| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.2809913726867605, |
| "eval_steps": 100, |
| "global_step": 3200, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0008780980396461265, |
| "grad_norm": 6.670812129974365, |
| "learning_rate": 1.0000000000000001e-07, |
| "logits/chosen": 4.2431488037109375, |
| "logits/rejected": 4.231738567352295, |
| "logps/chosen": -9.991304397583008, |
| "logps/rejected": -10.524327278137207, |
| "loss": 3.0309, |
| "nll_loss": 2.3641905784606934, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -2.99739146232605, |
| "rewards/margins": 0.15990665555000305, |
| "rewards/rejected": -3.1572983264923096, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.001756196079292253, |
| "grad_norm": 10.682082176208496, |
| "learning_rate": 2.1111111111111113e-07, |
| "logits/chosen": 4.309535980224609, |
| "logits/rejected": 4.361606597900391, |
| "logps/chosen": -9.786101341247559, |
| "logps/rejected": -10.518722534179688, |
| "loss": 3.1598, |
| "nll_loss": 2.5160868167877197, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -2.9358303546905518, |
| "rewards/margins": 0.2197863757610321, |
| "rewards/rejected": -3.1556167602539062, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0026342941189383797, |
| "grad_norm": 15.04028034210205, |
| "learning_rate": 3.2222222222222227e-07, |
| "logits/chosen": 4.225083351135254, |
| "logits/rejected": 4.215059757232666, |
| "logps/chosen": -9.968446731567383, |
| "logps/rejected": -10.634344100952148, |
| "loss": 2.9674, |
| "nll_loss": 2.318664073944092, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -2.9905343055725098, |
| "rewards/margins": 0.19976934790611267, |
| "rewards/rejected": -3.1903038024902344, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.003512392158584506, |
| "grad_norm": 10.793362617492676, |
| "learning_rate": 4.333333333333334e-07, |
| "logits/chosen": 4.328730583190918, |
| "logits/rejected": 4.321128845214844, |
| "logps/chosen": -9.762226104736328, |
| "logps/rejected": -10.319639205932617, |
| "loss": 2.7628, |
| "nll_loss": 2.110645294189453, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -2.928668260574341, |
| "rewards/margins": 0.1672237515449524, |
| "rewards/rejected": -3.0958914756774902, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.004390490198230633, |
| "grad_norm": 6.863041400909424, |
| "learning_rate": 5.444444444444444e-07, |
| "logits/chosen": 4.535063743591309, |
| "logits/rejected": 4.555140495300293, |
| "logps/chosen": -10.173809051513672, |
| "logps/rejected": -10.436089515686035, |
| "loss": 2.7587, |
| "nll_loss": 2.0572845935821533, |
| "rewards/accuracies": 0.5375000238418579, |
| "rewards/chosen": -3.052142858505249, |
| "rewards/margins": 0.07868396490812302, |
| "rewards/rejected": -3.130826711654663, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.005268588237876759, |
| "grad_norm": 11.63379955291748, |
| "learning_rate": 6.555555555555556e-07, |
| "logits/chosen": 4.210951805114746, |
| "logits/rejected": 4.193416595458984, |
| "logps/chosen": -9.710775375366211, |
| "logps/rejected": -10.588180541992188, |
| "loss": 3.023, |
| "nll_loss": 2.409853458404541, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -2.9132332801818848, |
| "rewards/margins": 0.2632210850715637, |
| "rewards/rejected": -3.1764540672302246, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.006146686277522885, |
| "grad_norm": 4.952016830444336, |
| "learning_rate": 7.666666666666667e-07, |
| "logits/chosen": 4.56010627746582, |
| "logits/rejected": 4.571717739105225, |
| "logps/chosen": -9.788490295410156, |
| "logps/rejected": -10.214717864990234, |
| "loss": 2.6203, |
| "nll_loss": 1.9469432830810547, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": -2.936546802520752, |
| "rewards/margins": 0.12786847352981567, |
| "rewards/rejected": -3.064415454864502, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.007024784317169012, |
| "grad_norm": 10.809489250183105, |
| "learning_rate": 8.777777777777778e-07, |
| "logits/chosen": 4.2120256423950195, |
| "logits/rejected": 4.222228050231934, |
| "logps/chosen": -9.806170463562012, |
| "logps/rejected": -10.296957969665527, |
| "loss": 3.3042, |
| "nll_loss": 2.644357681274414, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -2.9418509006500244, |
| "rewards/margins": 0.1472366452217102, |
| "rewards/rejected": -3.08908748626709, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.007902882356815138, |
| "grad_norm": 9.27618408203125, |
| "learning_rate": 9.88888888888889e-07, |
| "logits/chosen": 4.336479187011719, |
| "logits/rejected": 4.3232035636901855, |
| "logps/chosen": -9.828582763671875, |
| "logps/rejected": -10.485132217407227, |
| "loss": 3.1223, |
| "nll_loss": 2.4759140014648438, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -2.948575496673584, |
| "rewards/margins": 0.1969645768404007, |
| "rewards/rejected": -3.1455399990081787, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.008780980396461266, |
| "grad_norm": 19.710798263549805, |
| "learning_rate": 1.1e-06, |
| "logits/chosen": 4.149864673614502, |
| "logits/rejected": 4.147267818450928, |
| "logps/chosen": -9.967988014221191, |
| "logps/rejected": -10.663251876831055, |
| "loss": 2.9038, |
| "nll_loss": 2.2608590126037598, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -2.990396499633789, |
| "rewards/margins": 0.20857906341552734, |
| "rewards/rejected": -3.1989755630493164, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.009659078436107391, |
| "grad_norm": 10.258548736572266, |
| "learning_rate": 1.2111111111111111e-06, |
| "logits/chosen": 4.241273880004883, |
| "logits/rejected": 4.249630928039551, |
| "logps/chosen": -9.708308219909668, |
| "logps/rejected": -10.221686363220215, |
| "loss": 3.2698, |
| "nll_loss": 2.603203535079956, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -2.912492513656616, |
| "rewards/margins": 0.15401321649551392, |
| "rewards/rejected": -3.0665059089660645, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.010537176475753519, |
| "grad_norm": 3.6609084606170654, |
| "learning_rate": 1.3222222222222222e-06, |
| "logits/chosen": 4.420263290405273, |
| "logits/rejected": 4.413485527038574, |
| "logps/chosen": -9.733232498168945, |
| "logps/rejected": -10.436403274536133, |
| "loss": 2.5418, |
| "nll_loss": 1.9018337726593018, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -2.9199700355529785, |
| "rewards/margins": 0.21095120906829834, |
| "rewards/rejected": -3.1309211254119873, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.011415274515399644, |
| "grad_norm": 10.068683624267578, |
| "learning_rate": 1.4333333333333335e-06, |
| "logits/chosen": 4.42335319519043, |
| "logits/rejected": 4.493862152099609, |
| "logps/chosen": -9.73039722442627, |
| "logps/rejected": -10.323850631713867, |
| "loss": 2.5285, |
| "nll_loss": 1.8774116039276123, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -2.919119358062744, |
| "rewards/margins": 0.17803625762462616, |
| "rewards/rejected": -3.0971553325653076, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.01229337255504577, |
| "grad_norm": 11.362488746643066, |
| "learning_rate": 1.5444444444444446e-06, |
| "logits/chosen": 4.332414150238037, |
| "logits/rejected": 4.329668045043945, |
| "logps/chosen": -9.279766082763672, |
| "logps/rejected": -9.99905014038086, |
| "loss": 2.7181, |
| "nll_loss": 2.0923380851745605, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -2.7839295864105225, |
| "rewards/margins": 0.21578574180603027, |
| "rewards/rejected": -2.9997153282165527, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.013171470594691898, |
| "grad_norm": 14.254817008972168, |
| "learning_rate": 1.6555555555555559e-06, |
| "logits/chosen": 4.319321155548096, |
| "logits/rejected": 4.321578502655029, |
| "logps/chosen": -9.059564590454102, |
| "logps/rejected": -9.602411270141602, |
| "loss": 2.8514, |
| "nll_loss": 2.1910147666931152, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -2.717869758605957, |
| "rewards/margins": 0.16285373270511627, |
| "rewards/rejected": -2.880723476409912, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.014049568634338023, |
| "grad_norm": 4.008646488189697, |
| "learning_rate": 1.7666666666666668e-06, |
| "logits/chosen": 4.2461042404174805, |
| "logits/rejected": 4.25100040435791, |
| "logps/chosen": -8.857942581176758, |
| "logps/rejected": -9.37680435180664, |
| "loss": 2.5006, |
| "nll_loss": 1.839271903038025, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -2.6573829650878906, |
| "rewards/margins": 0.15565846860408783, |
| "rewards/rejected": -2.8130412101745605, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.01492766667398415, |
| "grad_norm": 10.644233703613281, |
| "learning_rate": 1.8777777777777778e-06, |
| "logits/chosen": 4.304837226867676, |
| "logits/rejected": 4.3417558670043945, |
| "logps/chosen": -8.620405197143555, |
| "logps/rejected": -9.385229110717773, |
| "loss": 2.5802, |
| "nll_loss": 1.932861328125, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -2.5861220359802246, |
| "rewards/margins": 0.22944733500480652, |
| "rewards/rejected": -2.8155694007873535, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.015805764713630276, |
| "grad_norm": 9.301039695739746, |
| "learning_rate": 1.988888888888889e-06, |
| "logits/chosen": 4.26754903793335, |
| "logits/rejected": 4.271862030029297, |
| "logps/chosen": -8.370404243469238, |
| "logps/rejected": -8.689592361450195, |
| "loss": 2.4808, |
| "nll_loss": 1.7635695934295654, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": -2.5111212730407715, |
| "rewards/margins": 0.09575649350881577, |
| "rewards/rejected": -2.6068778038024902, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.016683862753276404, |
| "grad_norm": 6.643105506896973, |
| "learning_rate": 2.1000000000000002e-06, |
| "logits/chosen": 4.255660533905029, |
| "logits/rejected": 4.268857955932617, |
| "logps/chosen": -7.950819492340088, |
| "logps/rejected": -8.541925430297852, |
| "loss": 2.2629, |
| "nll_loss": 1.6152054071426392, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -2.3852455615997314, |
| "rewards/margins": 0.17733201384544373, |
| "rewards/rejected": -2.562577724456787, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.01756196079292253, |
| "grad_norm": 9.348048210144043, |
| "learning_rate": 2.2111111111111113e-06, |
| "logits/chosen": 4.450976848602295, |
| "logits/rejected": 4.4492387771606445, |
| "logps/chosen": -7.3747453689575195, |
| "logps/rejected": -8.266874313354492, |
| "loss": 2.0158, |
| "nll_loss": 1.3951395750045776, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -2.212423801422119, |
| "rewards/margins": 0.2676388621330261, |
| "rewards/rejected": -2.480062484741211, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.018440058832568655, |
| "grad_norm": 5.35267972946167, |
| "learning_rate": 2.3222222222222224e-06, |
| "logits/chosen": 4.387726783752441, |
| "logits/rejected": 4.407253265380859, |
| "logps/chosen": -6.376626491546631, |
| "logps/rejected": -7.535942077636719, |
| "loss": 1.7794, |
| "nll_loss": 1.1858270168304443, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.9129879474639893, |
| "rewards/margins": 0.3477945923805237, |
| "rewards/rejected": -2.2607827186584473, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.019318156872214783, |
| "grad_norm": 8.461145401000977, |
| "learning_rate": 2.4333333333333335e-06, |
| "logits/chosen": 4.57470703125, |
| "logits/rejected": 4.573002815246582, |
| "logps/chosen": -5.478797912597656, |
| "logps/rejected": -6.914730072021484, |
| "loss": 1.4017, |
| "nll_loss": 0.8511344194412231, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.643639326095581, |
| "rewards/margins": 0.4307795464992523, |
| "rewards/rejected": -2.0744190216064453, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.02019625491186091, |
| "grad_norm": 5.639391899108887, |
| "learning_rate": 2.5444444444444446e-06, |
| "logits/chosen": 4.498848915100098, |
| "logits/rejected": 4.52827262878418, |
| "logps/chosen": -4.859742164611816, |
| "logps/rejected": -5.578665733337402, |
| "loss": 1.4217, |
| "nll_loss": 0.754524827003479, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -1.4579226970672607, |
| "rewards/margins": 0.21567705273628235, |
| "rewards/rejected": -1.6735999584197998, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.021074352951507037, |
| "grad_norm": 5.855747699737549, |
| "learning_rate": 2.6555555555555556e-06, |
| "logits/chosen": 4.4441728591918945, |
| "logits/rejected": 4.421013832092285, |
| "logps/chosen": -3.958739757537842, |
| "logps/rejected": -4.544581413269043, |
| "loss": 1.3529, |
| "nll_loss": 0.6709738969802856, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -1.1876219511032104, |
| "rewards/margins": 0.17575237154960632, |
| "rewards/rejected": -1.3633743524551392, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.02195245099115316, |
| "grad_norm": 5.383121490478516, |
| "learning_rate": 2.766666666666667e-06, |
| "logits/chosen": 4.187775611877441, |
| "logits/rejected": 4.246646404266357, |
| "logps/chosen": -2.965056896209717, |
| "logps/rejected": -3.8153586387634277, |
| "loss": 1.0879, |
| "nll_loss": 0.45148134231567383, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.8895170092582703, |
| "rewards/margins": 0.255090594291687, |
| "rewards/rejected": -1.144607663154602, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.02283054903079929, |
| "grad_norm": 4.042531967163086, |
| "learning_rate": 2.8777777777777782e-06, |
| "logits/chosen": 4.055316925048828, |
| "logits/rejected": 4.1006879806518555, |
| "logps/chosen": -2.189060688018799, |
| "logps/rejected": -2.7986176013946533, |
| "loss": 0.9665, |
| "nll_loss": 0.3139139711856842, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.6567181348800659, |
| "rewards/margins": 0.1828671246767044, |
| "rewards/rejected": -0.8395851850509644, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.023708647070445416, |
| "grad_norm": 3.6113719940185547, |
| "learning_rate": 2.988888888888889e-06, |
| "logits/chosen": 4.224070072174072, |
| "logits/rejected": 4.2020182609558105, |
| "logps/chosen": -1.5525212287902832, |
| "logps/rejected": -2.3060789108276367, |
| "loss": 0.7876, |
| "nll_loss": 0.17524096369743347, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.465756356716156, |
| "rewards/margins": 0.22606734931468964, |
| "rewards/rejected": -0.6918237805366516, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.02458674511009154, |
| "grad_norm": 2.20628023147583, |
| "learning_rate": 3.1000000000000004e-06, |
| "logits/chosen": 3.9985511302948, |
| "logits/rejected": 3.9572558403015137, |
| "logps/chosen": -1.0324242115020752, |
| "logps/rejected": -1.434692621231079, |
| "loss": 0.8117, |
| "nll_loss": 0.1578795313835144, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.309727281332016, |
| "rewards/margins": 0.12068048864603043, |
| "rewards/rejected": -0.4304077625274658, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.025464843149737668, |
| "grad_norm": 2.605509042739868, |
| "learning_rate": 3.2111111111111115e-06, |
| "logits/chosen": 3.9703261852264404, |
| "logits/rejected": 3.978794574737549, |
| "logps/chosen": -1.302750587463379, |
| "logps/rejected": -1.4635370969772339, |
| "loss": 0.8424, |
| "nll_loss": 0.15038228034973145, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.39082518219947815, |
| "rewards/margins": 0.048235934227705, |
| "rewards/rejected": -0.43906116485595703, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.026342941189383795, |
| "grad_norm": 4.225305080413818, |
| "learning_rate": 3.322222222222222e-06, |
| "logits/chosen": 3.872316360473633, |
| "logits/rejected": 3.909350872039795, |
| "logps/chosen": -0.7655197381973267, |
| "logps/rejected": -1.3384641408920288, |
| "loss": 0.7091, |
| "nll_loss": 0.07751598209142685, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.2296559065580368, |
| "rewards/margins": 0.17188331484794617, |
| "rewards/rejected": -0.40153923630714417, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.027221039229029922, |
| "grad_norm": 2.167863607406616, |
| "learning_rate": 3.4333333333333336e-06, |
| "logits/chosen": 3.8423914909362793, |
| "logits/rejected": 3.8470401763916016, |
| "logps/chosen": -0.781024158000946, |
| "logps/rejected": -1.1648194789886475, |
| "loss": 0.7324, |
| "nll_loss": 0.07609061896800995, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.23430728912353516, |
| "rewards/margins": 0.11513856798410416, |
| "rewards/rejected": -0.3494458794593811, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.028099137268676046, |
| "grad_norm": 2.662125587463379, |
| "learning_rate": 3.5444444444444447e-06, |
| "logits/chosen": 3.844832181930542, |
| "logits/rejected": 3.897855758666992, |
| "logps/chosen": -0.8712307214736938, |
| "logps/rejected": -1.2118942737579346, |
| "loss": 0.754, |
| "nll_loss": 0.09477487206459045, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -0.2613692581653595, |
| "rewards/margins": 0.10219905525445938, |
| "rewards/rejected": -0.3635682463645935, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.028977235308322174, |
| "grad_norm": 3.6492230892181396, |
| "learning_rate": 3.6555555555555562e-06, |
| "logits/chosen": 3.791374683380127, |
| "logits/rejected": 3.823376417160034, |
| "logps/chosen": -0.8908417820930481, |
| "logps/rejected": -1.1247615814208984, |
| "loss": 0.7571, |
| "nll_loss": 0.07855098694562912, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.26725253462791443, |
| "rewards/margins": 0.07017592340707779, |
| "rewards/rejected": -0.337428480386734, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.0298553333479683, |
| "grad_norm": 4.195188999176025, |
| "learning_rate": 3.766666666666667e-06, |
| "logits/chosen": 3.7930731773376465, |
| "logits/rejected": 3.771570920944214, |
| "logps/chosen": -0.5707345008850098, |
| "logps/rejected": -1.0306470394134521, |
| "loss": 0.7241, |
| "nll_loss": 0.07002006471157074, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.1712203472852707, |
| "rewards/margins": 0.13797374069690704, |
| "rewards/rejected": -0.30919408798217773, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.03073343138761443, |
| "grad_norm": 6.730047225952148, |
| "learning_rate": 3.877777777777778e-06, |
| "logits/chosen": 3.7914137840270996, |
| "logits/rejected": 3.830873966217041, |
| "logps/chosen": -0.48989325761795044, |
| "logps/rejected": -1.329006552696228, |
| "loss": 0.6549, |
| "nll_loss": 0.04915159195661545, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.14696797728538513, |
| "rewards/margins": 0.2517339587211609, |
| "rewards/rejected": -0.3987019658088684, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.03161152942726055, |
| "grad_norm": 5.190159320831299, |
| "learning_rate": 3.9888888888888895e-06, |
| "logits/chosen": 3.5701937675476074, |
| "logits/rejected": 3.623018264770508, |
| "logps/chosen": -1.0031934976577759, |
| "logps/rejected": -1.3071503639221191, |
| "loss": 0.7632, |
| "nll_loss": 0.07658834755420685, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": -0.3009580075740814, |
| "rewards/margins": 0.09118713438510895, |
| "rewards/rejected": -0.39214515686035156, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.03248962746690668, |
| "grad_norm": 6.694168567657471, |
| "learning_rate": 4.1e-06, |
| "logits/chosen": 3.823359727859497, |
| "logits/rejected": 3.852163314819336, |
| "logps/chosen": -0.6341744661331177, |
| "logps/rejected": -1.3019847869873047, |
| "loss": 0.7057, |
| "nll_loss": 0.07171504944562912, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.19025234878063202, |
| "rewards/margins": 0.20034310221672058, |
| "rewards/rejected": -0.3905954957008362, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.03336772550655281, |
| "grad_norm": 7.252128601074219, |
| "learning_rate": 4.211111111111112e-06, |
| "logits/chosen": 3.7956886291503906, |
| "logits/rejected": 3.795431137084961, |
| "logps/chosen": -0.7523115277290344, |
| "logps/rejected": -1.489225149154663, |
| "loss": 0.6923, |
| "nll_loss": 0.07404422760009766, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.2256934642791748, |
| "rewards/margins": 0.22107413411140442, |
| "rewards/rejected": -0.4467676281929016, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.034245823546198935, |
| "grad_norm": 2.945425271987915, |
| "learning_rate": 4.322222222222223e-06, |
| "logits/chosen": 3.5031909942626953, |
| "logits/rejected": 3.5364387035369873, |
| "logps/chosen": -0.6282280087471008, |
| "logps/rejected": -1.3442871570587158, |
| "loss": 0.7103, |
| "nll_loss": 0.06170845031738281, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.18846839666366577, |
| "rewards/margins": 0.21481776237487793, |
| "rewards/rejected": -0.4032861292362213, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.03512392158584506, |
| "grad_norm": 6.988142490386963, |
| "learning_rate": 4.433333333333334e-06, |
| "logits/chosen": 3.9105467796325684, |
| "logits/rejected": 3.931438446044922, |
| "logps/chosen": -0.9674631357192993, |
| "logps/rejected": -1.5149281024932861, |
| "loss": 0.7768, |
| "nll_loss": 0.11233203113079071, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -0.2902389466762543, |
| "rewards/margins": 0.1642395257949829, |
| "rewards/rejected": -0.45447850227355957, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.03600201962549118, |
| "grad_norm": 0.2611992359161377, |
| "learning_rate": 4.544444444444445e-06, |
| "logits/chosen": 3.6991469860076904, |
| "logits/rejected": 3.726545810699463, |
| "logps/chosen": -0.5881733894348145, |
| "logps/rejected": -1.2820765972137451, |
| "loss": 0.6929, |
| "nll_loss": 0.06608637422323227, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.17645201086997986, |
| "rewards/margins": 0.20817098021507263, |
| "rewards/rejected": -0.38462305068969727, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.03688011766513731, |
| "grad_norm": 3.019066095352173, |
| "learning_rate": 4.655555555555556e-06, |
| "logits/chosen": 3.4919254779815674, |
| "logits/rejected": 3.5151939392089844, |
| "logps/chosen": -0.5710722804069519, |
| "logps/rejected": -1.1787471771240234, |
| "loss": 0.712, |
| "nll_loss": 0.05282425880432129, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.17132170498371124, |
| "rewards/margins": 0.18230250477790833, |
| "rewards/rejected": -0.353624165058136, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.03775821570478344, |
| "grad_norm": 2.8214099407196045, |
| "learning_rate": 4.766666666666667e-06, |
| "logits/chosen": 3.7750792503356934, |
| "logits/rejected": 3.7456068992614746, |
| "logps/chosen": -0.7161394357681274, |
| "logps/rejected": -1.8331083059310913, |
| "loss": 0.6897, |
| "nll_loss": 0.09056379646062851, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.2148418426513672, |
| "rewards/margins": 0.335090696811676, |
| "rewards/rejected": -0.5499325394630432, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.038636313744429565, |
| "grad_norm": 5.72930383682251, |
| "learning_rate": 4.877777777777778e-06, |
| "logits/chosen": 3.5365283489227295, |
| "logits/rejected": 3.5479636192321777, |
| "logps/chosen": -0.7414464950561523, |
| "logps/rejected": -1.5791943073272705, |
| "loss": 0.6933, |
| "nll_loss": 0.0765593945980072, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.22243395447731018, |
| "rewards/margins": 0.2513243556022644, |
| "rewards/rejected": -0.4737583100795746, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.03951441178407569, |
| "grad_norm": 2.2949461936950684, |
| "learning_rate": 4.988888888888889e-06, |
| "logits/chosen": 3.2915852069854736, |
| "logits/rejected": 3.309730052947998, |
| "logps/chosen": -0.5450859069824219, |
| "logps/rejected": -0.948569118976593, |
| "loss": 0.7334, |
| "nll_loss": 0.0632125660777092, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.1635257601737976, |
| "rewards/margins": 0.12104494869709015, |
| "rewards/rejected": -0.28457072377204895, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.04039250982372182, |
| "grad_norm": 7.028234481811523, |
| "learning_rate": 5.1e-06, |
| "logits/chosen": 3.5347137451171875, |
| "logits/rejected": 3.542628049850464, |
| "logps/chosen": -0.6212174296379089, |
| "logps/rejected": -1.3661630153656006, |
| "loss": 0.7226, |
| "nll_loss": 0.08348599821329117, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.18636523187160492, |
| "rewards/margins": 0.2234836369752884, |
| "rewards/rejected": -0.4098488688468933, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.04127060786336795, |
| "grad_norm": 5.250203609466553, |
| "learning_rate": 5.211111111111111e-06, |
| "logits/chosen": 3.496631622314453, |
| "logits/rejected": 3.5331413745880127, |
| "logps/chosen": -0.6912875175476074, |
| "logps/rejected": -1.6311848163604736, |
| "loss": 0.6796, |
| "nll_loss": 0.062180064618587494, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.20738628506660461, |
| "rewards/margins": 0.2819691598415375, |
| "rewards/rejected": -0.4893553853034973, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.042148705903014075, |
| "grad_norm": 2.3006033897399902, |
| "learning_rate": 5.322222222222223e-06, |
| "logits/chosen": 3.731518268585205, |
| "logits/rejected": 3.775359630584717, |
| "logps/chosen": -0.8064204454421997, |
| "logps/rejected": -1.6423746347427368, |
| "loss": 0.7282, |
| "nll_loss": 0.09897418320178986, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.2419261485338211, |
| "rewards/margins": 0.25078627467155457, |
| "rewards/rejected": -0.49271243810653687, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.043026803942660195, |
| "grad_norm": 6.259355545043945, |
| "learning_rate": 5.4333333333333335e-06, |
| "logits/chosen": 3.3239219188690186, |
| "logits/rejected": 3.3287899494171143, |
| "logps/chosen": -0.623904824256897, |
| "logps/rejected": -1.5192670822143555, |
| "loss": 0.7155, |
| "nll_loss": 0.07001027464866638, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.18717142939567566, |
| "rewards/margins": 0.2686087191104889, |
| "rewards/rejected": -0.4557802081108093, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.04390490198230632, |
| "grad_norm": 4.574368000030518, |
| "learning_rate": 5.544444444444445e-06, |
| "logits/chosen": 3.667168378829956, |
| "logits/rejected": 3.707645893096924, |
| "logps/chosen": -0.7253153920173645, |
| "logps/rejected": -1.8622252941131592, |
| "loss": 0.6561, |
| "nll_loss": 0.06381665915250778, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.21759465336799622, |
| "rewards/margins": 0.34107303619384766, |
| "rewards/rejected": -0.5586676597595215, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.04478300002195245, |
| "grad_norm": 36.264381408691406, |
| "learning_rate": 5.6555555555555566e-06, |
| "logits/chosen": 3.398568630218506, |
| "logits/rejected": 3.468022108078003, |
| "logps/chosen": -0.5565214157104492, |
| "logps/rejected": -1.1638367176055908, |
| "loss": 0.7078, |
| "nll_loss": 0.06747711449861526, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.16695642471313477, |
| "rewards/margins": 0.18219462037086487, |
| "rewards/rejected": -0.34915101528167725, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.04566109806159858, |
| "grad_norm": 4.252997398376465, |
| "learning_rate": 5.766666666666667e-06, |
| "logits/chosen": 3.5346503257751465, |
| "logits/rejected": 3.526895046234131, |
| "logps/chosen": -0.9578359723091125, |
| "logps/rejected": -1.617163896560669, |
| "loss": 0.7567, |
| "nll_loss": 0.11074657738208771, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -0.2873508036136627, |
| "rewards/margins": 0.19779837131500244, |
| "rewards/rejected": -0.4851491451263428, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.046539196101244705, |
| "grad_norm": 10.661053657531738, |
| "learning_rate": 5.877777777777778e-06, |
| "logits/chosen": 3.5897374153137207, |
| "logits/rejected": 3.599020481109619, |
| "logps/chosen": -0.8656774759292603, |
| "logps/rejected": -1.2782808542251587, |
| "loss": 0.7624, |
| "nll_loss": 0.09282848984003067, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": -0.25970324873924255, |
| "rewards/margins": 0.12378102540969849, |
| "rewards/rejected": -0.38348424434661865, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.04741729414089083, |
| "grad_norm": 2.3466947078704834, |
| "learning_rate": 5.98888888888889e-06, |
| "logits/chosen": 3.41766619682312, |
| "logits/rejected": 3.4891743659973145, |
| "logps/chosen": -0.69093918800354, |
| "logps/rejected": -2.0744166374206543, |
| "loss": 0.6747, |
| "nll_loss": 0.0662418007850647, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.20728178322315216, |
| "rewards/margins": 0.41504326462745667, |
| "rewards/rejected": -0.6223250031471252, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.04829539218053696, |
| "grad_norm": 4.315151214599609, |
| "learning_rate": 6.1e-06, |
| "logits/chosen": 3.4109902381896973, |
| "logits/rejected": 3.5040442943573, |
| "logps/chosen": -0.5460541248321533, |
| "logps/rejected": -1.9011294841766357, |
| "loss": 0.653, |
| "nll_loss": 0.06273610144853592, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.16381624341011047, |
| "rewards/margins": 0.40652260184288025, |
| "rewards/rejected": -0.5703388452529907, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.04917349022018308, |
| "grad_norm": 4.520711898803711, |
| "learning_rate": 6.211111111111111e-06, |
| "logits/chosen": 3.562473773956299, |
| "logits/rejected": 3.6386642456054688, |
| "logps/chosen": -0.7312324047088623, |
| "logps/rejected": -1.6271352767944336, |
| "loss": 0.6966, |
| "nll_loss": 0.06128234788775444, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.21936972439289093, |
| "rewards/margins": 0.26877090334892273, |
| "rewards/rejected": -0.48814067244529724, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.05005158825982921, |
| "grad_norm": 7.277171611785889, |
| "learning_rate": 6.322222222222223e-06, |
| "logits/chosen": 3.472989559173584, |
| "logits/rejected": 3.443589687347412, |
| "logps/chosen": -0.7062110900878906, |
| "logps/rejected": -2.1304116249084473, |
| "loss": 0.6869, |
| "nll_loss": 0.09942348301410675, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.21186332404613495, |
| "rewards/margins": 0.42726022005081177, |
| "rewards/rejected": -0.6391235589981079, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.050929686299475335, |
| "grad_norm": 7.543278217315674, |
| "learning_rate": 6.433333333333333e-06, |
| "logits/chosen": 3.464301347732544, |
| "logits/rejected": 3.444230556488037, |
| "logps/chosen": -0.7348255515098572, |
| "logps/rejected": -1.602164626121521, |
| "loss": 0.7316, |
| "nll_loss": 0.08772562444210052, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.22044768929481506, |
| "rewards/margins": 0.26020172238349915, |
| "rewards/rejected": -0.4806493818759918, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.05180778433912146, |
| "grad_norm": 3.7779767513275146, |
| "learning_rate": 6.544444444444445e-06, |
| "logits/chosen": 3.633018970489502, |
| "logits/rejected": 3.709826946258545, |
| "logps/chosen": -0.9298914074897766, |
| "logps/rejected": -1.606702446937561, |
| "loss": 0.7953, |
| "nll_loss": 0.1196284145116806, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.2789674401283264, |
| "rewards/margins": 0.20304329693317413, |
| "rewards/rejected": -0.48201069235801697, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.05268588237876759, |
| "grad_norm": 7.008880615234375, |
| "learning_rate": 6.655555555555556e-06, |
| "logits/chosen": 3.381080150604248, |
| "logits/rejected": 3.444829225540161, |
| "logps/chosen": -0.7211336493492126, |
| "logps/rejected": -1.423513650894165, |
| "loss": 0.7429, |
| "nll_loss": 0.06740613281726837, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -0.216340109705925, |
| "rewards/margins": 0.21071402728557587, |
| "rewards/rejected": -0.42705410718917847, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.05356398041841372, |
| "grad_norm": 5.524548530578613, |
| "learning_rate": 6.7666666666666665e-06, |
| "logits/chosen": 3.4077486991882324, |
| "logits/rejected": 3.377532958984375, |
| "logps/chosen": -0.5095429420471191, |
| "logps/rejected": -1.0147814750671387, |
| "loss": 0.7157, |
| "nll_loss": 0.0589555986225605, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": -0.15286290645599365, |
| "rewards/margins": 0.1515716016292572, |
| "rewards/rejected": -0.30443447828292847, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.054442078458059845, |
| "grad_norm": 4.042827606201172, |
| "learning_rate": 6.8777777777777785e-06, |
| "logits/chosen": 3.716031551361084, |
| "logits/rejected": 3.713074207305908, |
| "logps/chosen": -0.8152651786804199, |
| "logps/rejected": -1.5531432628631592, |
| "loss": 0.7318, |
| "nll_loss": 0.08608300983905792, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.24457958340644836, |
| "rewards/margins": 0.22136345505714417, |
| "rewards/rejected": -0.46594300866127014, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.05532017649770597, |
| "grad_norm": 4.254418849945068, |
| "learning_rate": 6.9888888888888895e-06, |
| "logits/chosen": 3.4341864585876465, |
| "logits/rejected": 3.458519458770752, |
| "logps/chosen": -0.6925168037414551, |
| "logps/rejected": -1.5713815689086914, |
| "loss": 0.6898, |
| "nll_loss": 0.07025544345378876, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -0.20775504410266876, |
| "rewards/margins": 0.26365941762924194, |
| "rewards/rejected": -0.4714145064353943, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.05619827453735209, |
| "grad_norm": 2.973627805709839, |
| "learning_rate": 7.100000000000001e-06, |
| "logits/chosen": 3.4850242137908936, |
| "logits/rejected": 3.5192267894744873, |
| "logps/chosen": -1.1339752674102783, |
| "logps/rejected": -1.5586907863616943, |
| "loss": 0.8059, |
| "nll_loss": 0.10102218389511108, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": -0.34019264578819275, |
| "rewards/margins": 0.1274145543575287, |
| "rewards/rejected": -0.46760720014572144, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.05707637257699822, |
| "grad_norm": 3.3906142711639404, |
| "learning_rate": 7.211111111111112e-06, |
| "logits/chosen": 3.6287121772766113, |
| "logits/rejected": 3.5881965160369873, |
| "logps/chosen": -0.7608178853988647, |
| "logps/rejected": -1.2269501686096191, |
| "loss": 0.7394, |
| "nll_loss": 0.0750693827867508, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": -0.228245347738266, |
| "rewards/margins": 0.13983972370624542, |
| "rewards/rejected": -0.3680850863456726, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.05795447061664435, |
| "grad_norm": 0.03068475052714348, |
| "learning_rate": 7.322222222222223e-06, |
| "logits/chosen": 3.743140697479248, |
| "logits/rejected": 3.7635676860809326, |
| "logps/chosen": -0.5632290840148926, |
| "logps/rejected": -1.5820039510726929, |
| "loss": 0.6807, |
| "nll_loss": 0.07005371153354645, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.16896870732307434, |
| "rewards/margins": 0.30563241243362427, |
| "rewards/rejected": -0.47460120916366577, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.058832568656290475, |
| "grad_norm": 2.2352776527404785, |
| "learning_rate": 7.433333333333334e-06, |
| "logits/chosen": 3.5528149604797363, |
| "logits/rejected": 3.5446677207946777, |
| "logps/chosen": -0.5461568832397461, |
| "logps/rejected": -1.2885067462921143, |
| "loss": 0.6956, |
| "nll_loss": 0.06532245129346848, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.16384705901145935, |
| "rewards/margins": 0.22270497679710388, |
| "rewards/rejected": -0.3865520656108856, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.0597106666959366, |
| "grad_norm": 2.6797573566436768, |
| "learning_rate": 7.544444444444445e-06, |
| "logits/chosen": 3.3902244567871094, |
| "logits/rejected": 3.4180169105529785, |
| "logps/chosen": -0.6432263255119324, |
| "logps/rejected": -1.1323941946029663, |
| "loss": 0.7574, |
| "nll_loss": 0.07451333105564117, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": -0.19296793639659882, |
| "rewards/margins": 0.1467503160238266, |
| "rewards/rejected": -0.3397182822227478, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.06058876473558273, |
| "grad_norm": 5.877313137054443, |
| "learning_rate": 7.655555555555556e-06, |
| "logits/chosen": 3.4120171070098877, |
| "logits/rejected": 3.4442646503448486, |
| "logps/chosen": -0.7911199331283569, |
| "logps/rejected": -1.4578664302825928, |
| "loss": 0.7751, |
| "nll_loss": 0.1019410640001297, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -0.23733600974082947, |
| "rewards/margins": 0.20002400875091553, |
| "rewards/rejected": -0.4373599886894226, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.06146686277522886, |
| "grad_norm": 3.2915701866149902, |
| "learning_rate": 7.766666666666666e-06, |
| "logits/chosen": 3.5103302001953125, |
| "logits/rejected": 3.4995181560516357, |
| "logps/chosen": -0.6160825490951538, |
| "logps/rejected": -1.1019346714019775, |
| "loss": 0.7363, |
| "nll_loss": 0.06433330476284027, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": -0.18482479453086853, |
| "rewards/margins": 0.14575564861297607, |
| "rewards/rejected": -0.3305804133415222, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.06234496081487498, |
| "grad_norm": 3.7106151580810547, |
| "learning_rate": 7.877777777777778e-06, |
| "logits/chosen": 3.474386692047119, |
| "logits/rejected": 3.4553630352020264, |
| "logps/chosen": -0.6972242593765259, |
| "logps/rejected": -1.5925921201705933, |
| "loss": 0.7109, |
| "nll_loss": 0.07773466408252716, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -0.20916728675365448, |
| "rewards/margins": 0.2686103284358978, |
| "rewards/rejected": -0.4777776300907135, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.0632230588545211, |
| "grad_norm": 1.7685184478759766, |
| "learning_rate": 7.98888888888889e-06, |
| "logits/chosen": 3.6326797008514404, |
| "logits/rejected": 3.625549793243408, |
| "logps/chosen": -0.5006519556045532, |
| "logps/rejected": -1.8575595617294312, |
| "loss": 0.6416, |
| "nll_loss": 0.05640099197626114, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.15019558370113373, |
| "rewards/margins": 0.4070723056793213, |
| "rewards/rejected": -0.5572679042816162, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.06410115689416723, |
| "grad_norm": 4.595531463623047, |
| "learning_rate": 8.1e-06, |
| "logits/chosen": 3.6036553382873535, |
| "logits/rejected": 3.6813888549804688, |
| "logps/chosen": -1.0952359437942505, |
| "logps/rejected": -2.0565478801727295, |
| "loss": 0.7318, |
| "nll_loss": 0.06606093794107437, |
| "rewards/accuracies": 0.5249999761581421, |
| "rewards/chosen": -0.32857078313827515, |
| "rewards/margins": 0.28839364647865295, |
| "rewards/rejected": -0.6169643998146057, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.06497925493381336, |
| "grad_norm": 2.7277488708496094, |
| "learning_rate": 8.211111111111112e-06, |
| "logits/chosen": 3.4828929901123047, |
| "logits/rejected": 3.536961317062378, |
| "logps/chosen": -0.6164706945419312, |
| "logps/rejected": -2.1122829914093018, |
| "loss": 0.6062, |
| "nll_loss": 0.040962688624858856, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.18494121730327606, |
| "rewards/margins": 0.44874364137649536, |
| "rewards/rejected": -0.6336848735809326, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.06585735297345949, |
| "grad_norm": 3.6677157878875732, |
| "learning_rate": 8.322222222222223e-06, |
| "logits/chosen": 3.361325740814209, |
| "logits/rejected": 3.362967014312744, |
| "logps/chosen": -0.9366201162338257, |
| "logps/rejected": -1.7140756845474243, |
| "loss": 0.7507, |
| "nll_loss": 0.10440067946910858, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.28098607063293457, |
| "rewards/margins": 0.23323671519756317, |
| "rewards/rejected": -0.5142227411270142, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.06673545101310562, |
| "grad_norm": 3.729750633239746, |
| "learning_rate": 8.433333333333334e-06, |
| "logits/chosen": 3.343749523162842, |
| "logits/rejected": 3.401230573654175, |
| "logps/chosen": -0.7000004649162292, |
| "logps/rejected": -1.9269546270370483, |
| "loss": 0.6931, |
| "nll_loss": 0.07972760498523712, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": -0.2100001573562622, |
| "rewards/margins": 0.3680862486362457, |
| "rewards/rejected": -0.5780864357948303, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.06761354905275174, |
| "grad_norm": 7.166464328765869, |
| "learning_rate": 8.544444444444445e-06, |
| "logits/chosen": 3.4859509468078613, |
| "logits/rejected": 3.587602138519287, |
| "logps/chosen": -0.7638369798660278, |
| "logps/rejected": -2.2576065063476562, |
| "loss": 0.6832, |
| "nll_loss": 0.0738845020532608, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.22915109992027283, |
| "rewards/margins": 0.4481307864189148, |
| "rewards/rejected": -0.6772819757461548, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.06849164709239787, |
| "grad_norm": 2.416330575942993, |
| "learning_rate": 8.655555555555557e-06, |
| "logits/chosen": 3.408698558807373, |
| "logits/rejected": 3.4245800971984863, |
| "logps/chosen": -0.8418534398078918, |
| "logps/rejected": -1.4894107580184937, |
| "loss": 0.757, |
| "nll_loss": 0.0903228372335434, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.25255605578422546, |
| "rewards/margins": 0.1942671835422516, |
| "rewards/rejected": -0.44682326912879944, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.069369745132044, |
| "grad_norm": 4.281314373016357, |
| "learning_rate": 8.766666666666669e-06, |
| "logits/chosen": 3.2489428520202637, |
| "logits/rejected": 3.2514851093292236, |
| "logps/chosen": -0.8190711736679077, |
| "logps/rejected": -1.3487728834152222, |
| "loss": 0.7399, |
| "nll_loss": 0.07894166558980942, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.24572138488292694, |
| "rewards/margins": 0.15891052782535553, |
| "rewards/rejected": -0.40463191270828247, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.07024784317169012, |
| "grad_norm": 1.3228946924209595, |
| "learning_rate": 8.877777777777779e-06, |
| "logits/chosen": 3.2964024543762207, |
| "logits/rejected": 3.3055152893066406, |
| "logps/chosen": -0.8143989443778992, |
| "logps/rejected": -1.2753360271453857, |
| "loss": 0.75, |
| "nll_loss": 0.07704529166221619, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -0.24431967735290527, |
| "rewards/margins": 0.13828110694885254, |
| "rewards/rejected": -0.3826007843017578, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.07112594121133625, |
| "grad_norm": 3.7781331539154053, |
| "learning_rate": 8.988888888888889e-06, |
| "logits/chosen": 3.68397855758667, |
| "logits/rejected": 3.6694297790527344, |
| "logps/chosen": -0.9365280866622925, |
| "logps/rejected": -1.7311958074569702, |
| "loss": 0.7448, |
| "nll_loss": 0.08974708616733551, |
| "rewards/accuracies": 0.5249999761581421, |
| "rewards/chosen": -0.28095847368240356, |
| "rewards/margins": 0.23840029537677765, |
| "rewards/rejected": -0.51935875415802, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.07200403925098237, |
| "grad_norm": 2.7228267192840576, |
| "learning_rate": 9.100000000000001e-06, |
| "logits/chosen": 3.6748175621032715, |
| "logits/rejected": 3.7467494010925293, |
| "logps/chosen": -0.5383256077766418, |
| "logps/rejected": -2.2832655906677246, |
| "loss": 0.6081, |
| "nll_loss": 0.06415946036577225, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.16149768233299255, |
| "rewards/margins": 0.523482084274292, |
| "rewards/rejected": -0.6849797964096069, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.0728821372906285, |
| "grad_norm": 1.3846280574798584, |
| "learning_rate": 9.211111111111111e-06, |
| "logits/chosen": 3.2965283393859863, |
| "logits/rejected": 3.329348087310791, |
| "logps/chosen": -0.5321189761161804, |
| "logps/rejected": -2.2239809036254883, |
| "loss": 0.6186, |
| "nll_loss": 0.04651743918657303, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.15963570773601532, |
| "rewards/margins": 0.5075585842132568, |
| "rewards/rejected": -0.6671942472457886, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.07376023533027462, |
| "grad_norm": 4.5536675453186035, |
| "learning_rate": 9.322222222222223e-06, |
| "logits/chosen": 3.347224473953247, |
| "logits/rejected": 3.3436226844787598, |
| "logps/chosen": -0.7508156895637512, |
| "logps/rejected": -1.867352843284607, |
| "loss": 0.707, |
| "nll_loss": 0.0711875781416893, |
| "rewards/accuracies": 0.512499988079071, |
| "rewards/chosen": -0.2252446860074997, |
| "rewards/margins": 0.3349612355232239, |
| "rewards/rejected": -0.56020587682724, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.07463833336992075, |
| "grad_norm": 4.691596508026123, |
| "learning_rate": 9.433333333333335e-06, |
| "logits/chosen": 3.253293991088867, |
| "logits/rejected": 3.2947421073913574, |
| "logps/chosen": -0.9249482154846191, |
| "logps/rejected": -1.812909483909607, |
| "loss": 0.7568, |
| "nll_loss": 0.09252104163169861, |
| "rewards/accuracies": 0.4749999940395355, |
| "rewards/chosen": -0.2774844765663147, |
| "rewards/margins": 0.2663884162902832, |
| "rewards/rejected": -0.5438728928565979, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.07551643140956688, |
| "grad_norm": 4.119868278503418, |
| "learning_rate": 9.544444444444445e-06, |
| "logits/chosen": 3.3257553577423096, |
| "logits/rejected": 3.3328521251678467, |
| "logps/chosen": -0.8130607604980469, |
| "logps/rejected": -2.2179336547851562, |
| "loss": 0.7061, |
| "nll_loss": 0.09296734631061554, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -0.2439182549715042, |
| "rewards/margins": 0.42146188020706177, |
| "rewards/rejected": -0.6653801202774048, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.076394529449213, |
| "grad_norm": 4.941491603851318, |
| "learning_rate": 9.655555555555556e-06, |
| "logits/chosen": 3.2392685413360596, |
| "logits/rejected": 3.2623963356018066, |
| "logps/chosen": -0.7361981272697449, |
| "logps/rejected": -1.6700445413589478, |
| "loss": 0.7312, |
| "nll_loss": 0.08274148404598236, |
| "rewards/accuracies": 0.512499988079071, |
| "rewards/chosen": -0.22085945308208466, |
| "rewards/margins": 0.28015393018722534, |
| "rewards/rejected": -0.5010133385658264, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.07727262748885913, |
| "grad_norm": 1.4032851457595825, |
| "learning_rate": 9.766666666666667e-06, |
| "logits/chosen": 3.2956814765930176, |
| "logits/rejected": 3.318169355392456, |
| "logps/chosen": -0.7056166529655457, |
| "logps/rejected": -1.8036190271377563, |
| "loss": 0.7136, |
| "nll_loss": 0.09712977707386017, |
| "rewards/accuracies": 0.5375000238418579, |
| "rewards/chosen": -0.21168498694896698, |
| "rewards/margins": 0.3294007182121277, |
| "rewards/rejected": -0.5410857200622559, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.07815072552850526, |
| "grad_norm": 3.9460878372192383, |
| "learning_rate": 9.877777777777778e-06, |
| "logits/chosen": 3.4804458618164062, |
| "logits/rejected": 3.5131962299346924, |
| "logps/chosen": -0.9113373756408691, |
| "logps/rejected": -1.8960120677947998, |
| "loss": 0.7272, |
| "nll_loss": 0.08563139289617538, |
| "rewards/accuracies": 0.4749999940395355, |
| "rewards/chosen": -0.2734012007713318, |
| "rewards/margins": 0.295402467250824, |
| "rewards/rejected": -0.5688036680221558, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.07902882356815139, |
| "grad_norm": 5.001852989196777, |
| "learning_rate": 9.98888888888889e-06, |
| "logits/chosen": 3.5693771839141846, |
| "logits/rejected": 3.613219738006592, |
| "logps/chosen": -0.7482819557189941, |
| "logps/rejected": -1.611090898513794, |
| "loss": 0.7488, |
| "nll_loss": 0.10075131803750992, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -0.2244846075773239, |
| "rewards/margins": 0.25884273648262024, |
| "rewards/rejected": -0.48332732915878296, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.07990692160779751, |
| "grad_norm": 3.9788780212402344, |
| "learning_rate": 9.98888888888889e-06, |
| "logits/chosen": 3.418684720993042, |
| "logits/rejected": 3.4473800659179688, |
| "logps/chosen": -0.5167075991630554, |
| "logps/rejected": -1.0832570791244507, |
| "loss": 0.7187, |
| "nll_loss": 0.053985703736543655, |
| "rewards/accuracies": 0.44999998807907104, |
| "rewards/chosen": -0.155012309551239, |
| "rewards/margins": 0.16996484994888306, |
| "rewards/rejected": -0.3249771296977997, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.08078501964744364, |
| "grad_norm": 1.5054136514663696, |
| "learning_rate": 9.976543209876544e-06, |
| "logits/chosen": 3.389498233795166, |
| "logits/rejected": 3.4252688884735107, |
| "logps/chosen": -0.6502631902694702, |
| "logps/rejected": -1.8272225856781006, |
| "loss": 0.6756, |
| "nll_loss": 0.07306591421365738, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.1950789839029312, |
| "rewards/margins": 0.35308781266212463, |
| "rewards/rejected": -0.5481668710708618, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.08166311768708977, |
| "grad_norm": 2.810540199279785, |
| "learning_rate": 9.964197530864198e-06, |
| "logits/chosen": 3.4912326335906982, |
| "logits/rejected": 3.503628969192505, |
| "logps/chosen": -0.4884684681892395, |
| "logps/rejected": -1.4588502645492554, |
| "loss": 0.6613, |
| "nll_loss": 0.04402286559343338, |
| "rewards/accuracies": 0.5249999761581421, |
| "rewards/chosen": -0.14654052257537842, |
| "rewards/margins": 0.29111456871032715, |
| "rewards/rejected": -0.4376550614833832, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.0825412157267359, |
| "grad_norm": 5.386466979980469, |
| "learning_rate": 9.951851851851853e-06, |
| "logits/chosen": 3.386685848236084, |
| "logits/rejected": 3.4002914428710938, |
| "logps/chosen": -0.5482162237167358, |
| "logps/rejected": -1.572486162185669, |
| "loss": 0.6616, |
| "nll_loss": 0.053943734616041183, |
| "rewards/accuracies": 0.5249999761581421, |
| "rewards/chosen": -0.16446486115455627, |
| "rewards/margins": 0.307280957698822, |
| "rewards/rejected": -0.4717458188533783, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.08341931376638202, |
| "grad_norm": 1.676483154296875, |
| "learning_rate": 9.939506172839507e-06, |
| "logits/chosen": 3.3065590858459473, |
| "logits/rejected": 3.3416500091552734, |
| "logps/chosen": -0.5273550748825073, |
| "logps/rejected": -1.707932472229004, |
| "loss": 0.6729, |
| "nll_loss": 0.06074246019124985, |
| "rewards/accuracies": 0.5375000238418579, |
| "rewards/chosen": -0.1582065373659134, |
| "rewards/margins": 0.3541732430458069, |
| "rewards/rejected": -0.5123798251152039, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.08429741180602815, |
| "grad_norm": 2.689913272857666, |
| "learning_rate": 9.927160493827162e-06, |
| "logits/chosen": 3.2740864753723145, |
| "logits/rejected": 3.335360050201416, |
| "logps/chosen": -0.7466616630554199, |
| "logps/rejected": -2.0391454696655273, |
| "loss": 0.7055, |
| "nll_loss": 0.09172563254833221, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.2239985167980194, |
| "rewards/margins": 0.3877451419830322, |
| "rewards/rejected": -0.6117436289787292, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.08517550984567426, |
| "grad_norm": 3.589853286743164, |
| "learning_rate": 9.914814814814816e-06, |
| "logits/chosen": 3.0097994804382324, |
| "logits/rejected": 3.0853917598724365, |
| "logps/chosen": -0.5722948312759399, |
| "logps/rejected": -1.9204362630844116, |
| "loss": 0.6501, |
| "nll_loss": 0.051983099430799484, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.17168846726417542, |
| "rewards/margins": 0.4044424593448639, |
| "rewards/rejected": -0.5761309266090393, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.08605360788532039, |
| "grad_norm": 1.7398462295532227, |
| "learning_rate": 9.90246913580247e-06, |
| "logits/chosen": 3.3110098838806152, |
| "logits/rejected": 3.4121768474578857, |
| "logps/chosen": -0.6699460744857788, |
| "logps/rejected": -2.0870394706726074, |
| "loss": 0.6724, |
| "nll_loss": 0.053984154015779495, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.20098385214805603, |
| "rewards/margins": 0.4251279830932617, |
| "rewards/rejected": -0.6261118054389954, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.08693170592496652, |
| "grad_norm": 5.3601861000061035, |
| "learning_rate": 9.890123456790123e-06, |
| "logits/chosen": 3.1401820182800293, |
| "logits/rejected": 3.127436399459839, |
| "logps/chosen": -0.7694223523139954, |
| "logps/rejected": -1.6481168270111084, |
| "loss": 0.737, |
| "nll_loss": 0.09365083277225494, |
| "rewards/accuracies": 0.512499988079071, |
| "rewards/chosen": -0.230826735496521, |
| "rewards/margins": 0.2636083662509918, |
| "rewards/rejected": -0.49443507194519043, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.08780980396461265, |
| "grad_norm": 2.6405630111694336, |
| "learning_rate": 9.877777777777778e-06, |
| "logits/chosen": 3.1894805431365967, |
| "logits/rejected": 3.2360007762908936, |
| "logps/chosen": -0.5739088654518127, |
| "logps/rejected": -2.0212159156799316, |
| "loss": 0.6616, |
| "nll_loss": 0.05945644527673721, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.1721726506948471, |
| "rewards/margins": 0.4341921806335449, |
| "rewards/rejected": -0.606364905834198, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.08868790200425877, |
| "grad_norm": 2.2846076488494873, |
| "learning_rate": 9.865432098765432e-06, |
| "logits/chosen": 3.2782859802246094, |
| "logits/rejected": 3.2814033031463623, |
| "logps/chosen": -0.5302027463912964, |
| "logps/rejected": -1.8145701885223389, |
| "loss": 0.6543, |
| "nll_loss": 0.05663750320672989, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.15906082093715668, |
| "rewards/margins": 0.3853102922439575, |
| "rewards/rejected": -0.5443711280822754, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.0895660000439049, |
| "grad_norm": 13.04986572265625, |
| "learning_rate": 9.853086419753087e-06, |
| "logits/chosen": 2.8904287815093994, |
| "logits/rejected": 2.907032012939453, |
| "logps/chosen": -1.1254570484161377, |
| "logps/rejected": -2.3068795204162598, |
| "loss": 0.7832, |
| "nll_loss": 0.1419171392917633, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.33763712644577026, |
| "rewards/margins": 0.354426771402359, |
| "rewards/rejected": -0.6920639276504517, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.09044409808355103, |
| "grad_norm": 2.556560754776001, |
| "learning_rate": 9.840740740740743e-06, |
| "logits/chosen": 3.151669502258301, |
| "logits/rejected": 3.1690382957458496, |
| "logps/chosen": -0.7756383419036865, |
| "logps/rejected": -1.6224708557128906, |
| "loss": 0.7551, |
| "nll_loss": 0.09495635330677032, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.23269149661064148, |
| "rewards/margins": 0.25404977798461914, |
| "rewards/rejected": -0.486741304397583, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.09132219612319716, |
| "grad_norm": 1.3183997869491577, |
| "learning_rate": 9.828395061728397e-06, |
| "logits/chosen": 3.195861339569092, |
| "logits/rejected": 3.2810165882110596, |
| "logps/chosen": -0.5329464077949524, |
| "logps/rejected": -1.2915513515472412, |
| "loss": 0.7152, |
| "nll_loss": 0.054315369576215744, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.15988394618034363, |
| "rewards/margins": 0.22758150100708008, |
| "rewards/rejected": -0.3874654471874237, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.09220029416284328, |
| "grad_norm": 1.674479603767395, |
| "learning_rate": 9.81604938271605e-06, |
| "logits/chosen": 3.139688491821289, |
| "logits/rejected": 3.2257683277130127, |
| "logps/chosen": -0.7113819122314453, |
| "logps/rejected": -2.1473631858825684, |
| "loss": 0.679, |
| "nll_loss": 0.04762103408575058, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.21341457962989807, |
| "rewards/margins": 0.4307943284511566, |
| "rewards/rejected": -0.6442088484764099, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.09307839220248941, |
| "grad_norm": 1.9272900819778442, |
| "learning_rate": 9.803703703703704e-06, |
| "logits/chosen": 3.403465986251831, |
| "logits/rejected": 3.385577440261841, |
| "logps/chosen": -0.9791383743286133, |
| "logps/rejected": -2.0091757774353027, |
| "loss": 0.7499, |
| "nll_loss": 0.11029829829931259, |
| "rewards/accuracies": 0.5249999761581421, |
| "rewards/chosen": -0.29374146461486816, |
| "rewards/margins": 0.309011310338974, |
| "rewards/rejected": -0.6027528047561646, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.09395649024213554, |
| "grad_norm": 3.530672073364258, |
| "learning_rate": 9.791358024691359e-06, |
| "logits/chosen": 2.975001573562622, |
| "logits/rejected": 3.063398838043213, |
| "logps/chosen": -0.9404687881469727, |
| "logps/rejected": -1.803180456161499, |
| "loss": 0.7547, |
| "nll_loss": 0.079728864133358, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.28214067220687866, |
| "rewards/margins": 0.2588135600090027, |
| "rewards/rejected": -0.5409542322158813, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.09483458828178166, |
| "grad_norm": 7.528195381164551, |
| "learning_rate": 9.779012345679013e-06, |
| "logits/chosen": 3.1206297874450684, |
| "logits/rejected": 3.173870086669922, |
| "logps/chosen": -0.6394155621528625, |
| "logps/rejected": -1.1264150142669678, |
| "loss": 0.7336, |
| "nll_loss": 0.06591827422380447, |
| "rewards/accuracies": 0.5375000238418579, |
| "rewards/chosen": -0.19182467460632324, |
| "rewards/margins": 0.14609983563423157, |
| "rewards/rejected": -0.3379245400428772, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.09571268632142779, |
| "grad_norm": 2.3711354732513428, |
| "learning_rate": 9.766666666666667e-06, |
| "logits/chosen": 3.2346444129943848, |
| "logits/rejected": 3.3024227619171143, |
| "logps/chosen": -0.7532138824462891, |
| "logps/rejected": -1.9977819919586182, |
| "loss": 0.6926, |
| "nll_loss": 0.0733107179403305, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -0.22596418857574463, |
| "rewards/margins": 0.3733704090118408, |
| "rewards/rejected": -0.5993345975875854, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.09659078436107392, |
| "grad_norm": 2.3100759983062744, |
| "learning_rate": 9.754320987654322e-06, |
| "logits/chosen": 3.0819344520568848, |
| "logits/rejected": 3.081664562225342, |
| "logps/chosen": -0.39054492115974426, |
| "logps/rejected": -1.3339643478393555, |
| "loss": 0.6571, |
| "nll_loss": 0.03647618740797043, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.11716349422931671, |
| "rewards/margins": 0.2830258309841156, |
| "rewards/rejected": -0.4001893401145935, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.09746888240072005, |
| "grad_norm": 4.387854099273682, |
| "learning_rate": 9.741975308641976e-06, |
| "logits/chosen": 3.0971732139587402, |
| "logits/rejected": 3.105783224105835, |
| "logps/chosen": -0.6461865305900574, |
| "logps/rejected": -1.5687153339385986, |
| "loss": 0.7209, |
| "nll_loss": 0.08219017088413239, |
| "rewards/accuracies": 0.5375000238418579, |
| "rewards/chosen": -0.19385597109794617, |
| "rewards/margins": 0.2767586410045624, |
| "rewards/rejected": -0.47061461210250854, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.09834698044036616, |
| "grad_norm": 1.8574669361114502, |
| "learning_rate": 9.72962962962963e-06, |
| "logits/chosen": 3.2028121948242188, |
| "logits/rejected": 3.188230037689209, |
| "logps/chosen": -0.6974012851715088, |
| "logps/rejected": -2.043318033218384, |
| "loss": 0.6628, |
| "nll_loss": 0.06341539323329926, |
| "rewards/accuracies": 0.512499988079071, |
| "rewards/chosen": -0.20922040939331055, |
| "rewards/margins": 0.40377503633499146, |
| "rewards/rejected": -0.612995445728302, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.09922507848001229, |
| "grad_norm": 1.4223850965499878, |
| "learning_rate": 9.717283950617285e-06, |
| "logits/chosen": 3.1057040691375732, |
| "logits/rejected": 3.1665711402893066, |
| "logps/chosen": -0.4518910348415375, |
| "logps/rejected": -1.782041311264038, |
| "loss": 0.6529, |
| "nll_loss": 0.04685738682746887, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": -0.1355672925710678, |
| "rewards/margins": 0.39904507994651794, |
| "rewards/rejected": -0.5346124172210693, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.10010317651965842, |
| "grad_norm": 0.6512376666069031, |
| "learning_rate": 9.70493827160494e-06, |
| "logits/chosen": 3.062418222427368, |
| "logits/rejected": 3.1085588932037354, |
| "logps/chosen": -0.7759238481521606, |
| "logps/rejected": -2.237229585647583, |
| "loss": 0.6829, |
| "nll_loss": 0.08105801045894623, |
| "rewards/accuracies": 0.5249999761581421, |
| "rewards/chosen": -0.2327771931886673, |
| "rewards/margins": 0.43839168548583984, |
| "rewards/rejected": -0.6711689233779907, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.10098127455930454, |
| "grad_norm": 2.3270082473754883, |
| "learning_rate": 9.692592592592594e-06, |
| "logits/chosen": 2.8819382190704346, |
| "logits/rejected": 2.9401650428771973, |
| "logps/chosen": -0.5062090754508972, |
| "logps/rejected": -1.868971824645996, |
| "loss": 0.6794, |
| "nll_loss": 0.05651511624455452, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.1518627107143402, |
| "rewards/margins": 0.40882882475852966, |
| "rewards/rejected": -0.5606915354728699, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.10185937259895067, |
| "grad_norm": 1.2098430395126343, |
| "learning_rate": 9.680246913580248e-06, |
| "logits/chosen": 2.7454497814178467, |
| "logits/rejected": 2.780897617340088, |
| "logps/chosen": -0.9837905168533325, |
| "logps/rejected": -2.0501391887664795, |
| "loss": 0.7538, |
| "nll_loss": 0.10150198638439178, |
| "rewards/accuracies": 0.5249999761581421, |
| "rewards/chosen": -0.2951371669769287, |
| "rewards/margins": 0.319904625415802, |
| "rewards/rejected": -0.6150418519973755, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.1027374706385968, |
| "grad_norm": 2.469829559326172, |
| "learning_rate": 9.667901234567903e-06, |
| "logits/chosen": 2.8060965538024902, |
| "logits/rejected": 2.8710038661956787, |
| "logps/chosen": -0.6813799738883972, |
| "logps/rejected": -2.1527724266052246, |
| "loss": 0.6474, |
| "nll_loss": 0.06949031352996826, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.2044139802455902, |
| "rewards/margins": 0.44141775369644165, |
| "rewards/rejected": -0.6458317637443542, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.10361556867824293, |
| "grad_norm": 3.5877394676208496, |
| "learning_rate": 9.655555555555556e-06, |
| "logits/chosen": 3.1048672199249268, |
| "logits/rejected": 3.1411147117614746, |
| "logps/chosen": -0.4382111132144928, |
| "logps/rejected": -2.12353253364563, |
| "loss": 0.624, |
| "nll_loss": 0.048050910234451294, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": -0.13146333396434784, |
| "rewards/margins": 0.5055964589118958, |
| "rewards/rejected": -0.6370598077774048, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.10449366671788905, |
| "grad_norm": 13.380763053894043, |
| "learning_rate": 9.64320987654321e-06, |
| "logits/chosen": 2.6727805137634277, |
| "logits/rejected": 2.7236101627349854, |
| "logps/chosen": -0.7069253325462341, |
| "logps/rejected": -2.2335355281829834, |
| "loss": 0.6386, |
| "nll_loss": 0.05090578272938728, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.21207761764526367, |
| "rewards/margins": 0.45798301696777344, |
| "rewards/rejected": -0.6700606346130371, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.10537176475753518, |
| "grad_norm": 7.120882034301758, |
| "learning_rate": 9.630864197530864e-06, |
| "logits/chosen": 2.8762526512145996, |
| "logits/rejected": 2.901745319366455, |
| "logps/chosen": -0.7335812449455261, |
| "logps/rejected": -2.8476223945617676, |
| "loss": 0.6497, |
| "nll_loss": 0.07983629405498505, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.2200743705034256, |
| "rewards/margins": 0.6342123746871948, |
| "rewards/rejected": -0.854286789894104, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.10624986279718131, |
| "grad_norm": 5.2290263175964355, |
| "learning_rate": 9.618518518518519e-06, |
| "logits/chosen": 3.0229249000549316, |
| "logits/rejected": 2.959900379180908, |
| "logps/chosen": -1.3016353845596313, |
| "logps/rejected": -1.7729085683822632, |
| "loss": 0.8886, |
| "nll_loss": 0.14201593399047852, |
| "rewards/accuracies": 0.512499988079071, |
| "rewards/chosen": -0.3904905915260315, |
| "rewards/margins": 0.14138197898864746, |
| "rewards/rejected": -0.5318726301193237, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.10712796083682743, |
| "grad_norm": 1.350420355796814, |
| "learning_rate": 9.606172839506173e-06, |
| "logits/chosen": 2.695782423019409, |
| "logits/rejected": 2.6982076168060303, |
| "logps/chosen": -0.5528481602668762, |
| "logps/rejected": -1.7437477111816406, |
| "loss": 0.7039, |
| "nll_loss": 0.06110968068242073, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.16585442423820496, |
| "rewards/margins": 0.3572699725627899, |
| "rewards/rejected": -0.5231243371963501, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.10800605887647356, |
| "grad_norm": 4.9116291999816895, |
| "learning_rate": 9.593827160493828e-06, |
| "logits/chosen": 2.8747756481170654, |
| "logits/rejected": 2.797008514404297, |
| "logps/chosen": -0.7696909308433533, |
| "logps/rejected": -1.8576171398162842, |
| "loss": 0.7164, |
| "nll_loss": 0.08876083791255951, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -0.23090729117393494, |
| "rewards/margins": 0.3263779282569885, |
| "rewards/rejected": -0.5572851896286011, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.10888415691611969, |
| "grad_norm": 3.6960697174072266, |
| "learning_rate": 9.581481481481482e-06, |
| "logits/chosen": 3.121314287185669, |
| "logits/rejected": 3.1768624782562256, |
| "logps/chosen": -0.597256064414978, |
| "logps/rejected": -2.1361727714538574, |
| "loss": 0.6718, |
| "nll_loss": 0.06965653598308563, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.17917683720588684, |
| "rewards/margins": 0.4616750180721283, |
| "rewards/rejected": -0.6408518552780151, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.10976225495576582, |
| "grad_norm": 1.3148126602172852, |
| "learning_rate": 9.569135802469136e-06, |
| "logits/chosen": 2.9931716918945312, |
| "logits/rejected": 3.0196568965911865, |
| "logps/chosen": -0.8401254415512085, |
| "logps/rejected": -2.2920265197753906, |
| "loss": 0.7173, |
| "nll_loss": 0.08757736533880234, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": -0.2520376443862915, |
| "rewards/margins": 0.4355703294277191, |
| "rewards/rejected": -0.6876079440116882, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.11064035299541194, |
| "grad_norm": 3.798100471496582, |
| "learning_rate": 9.556790123456791e-06, |
| "logits/chosen": 2.9449126720428467, |
| "logits/rejected": 2.9711549282073975, |
| "logps/chosen": -0.6954627633094788, |
| "logps/rejected": -1.5479028224945068, |
| "loss": 0.7229, |
| "nll_loss": 0.06693422794342041, |
| "rewards/accuracies": 0.512499988079071, |
| "rewards/chosen": -0.20863886177539825, |
| "rewards/margins": 0.2557320296764374, |
| "rewards/rejected": -0.4643709063529968, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.11151845103505806, |
| "grad_norm": 2.8658065795898438, |
| "learning_rate": 9.544444444444445e-06, |
| "logits/chosen": 3.015810966491699, |
| "logits/rejected": 3.0757055282592773, |
| "logps/chosen": -1.2240221500396729, |
| "logps/rejected": -1.8749526739120483, |
| "loss": 0.8117, |
| "nll_loss": 0.10025894641876221, |
| "rewards/accuracies": 0.42500001192092896, |
| "rewards/chosen": -0.3672066628932953, |
| "rewards/margins": 0.1952790915966034, |
| "rewards/rejected": -0.5624858140945435, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.11239654907470419, |
| "grad_norm": 2.584207534790039, |
| "learning_rate": 9.5320987654321e-06, |
| "logits/chosen": 3.0405101776123047, |
| "logits/rejected": 3.0836472511291504, |
| "logps/chosen": -0.5894891619682312, |
| "logps/rejected": -1.7727954387664795, |
| "loss": 0.6706, |
| "nll_loss": 0.057862233370542526, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -0.17684674263000488, |
| "rewards/margins": 0.35499197244644165, |
| "rewards/rejected": -0.5318387150764465, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.11327464711435031, |
| "grad_norm": 0.8380700945854187, |
| "learning_rate": 9.519753086419754e-06, |
| "logits/chosen": 3.015899896621704, |
| "logits/rejected": 2.9938347339630127, |
| "logps/chosen": -0.48675793409347534, |
| "logps/rejected": -1.6840702295303345, |
| "loss": 0.6618, |
| "nll_loss": 0.06300728023052216, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.14602738618850708, |
| "rewards/margins": 0.35919371247291565, |
| "rewards/rejected": -0.5052211880683899, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.11415274515399644, |
| "grad_norm": 1.4230750799179077, |
| "learning_rate": 9.507407407407409e-06, |
| "logits/chosen": 3.2599899768829346, |
| "logits/rejected": 3.2479500770568848, |
| "logps/chosen": -0.5513351559638977, |
| "logps/rejected": -1.5504658222198486, |
| "loss": 0.7042, |
| "nll_loss": 0.05995137244462967, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.16540054976940155, |
| "rewards/margins": 0.299739271402359, |
| "rewards/rejected": -0.46513980627059937, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.11503084319364257, |
| "grad_norm": 1.0455182790756226, |
| "learning_rate": 9.495061728395063e-06, |
| "logits/chosen": 3.0312843322753906, |
| "logits/rejected": 3.068418025970459, |
| "logps/chosen": -0.5078593492507935, |
| "logps/rejected": -2.3526813983917236, |
| "loss": 0.6117, |
| "nll_loss": 0.04707217961549759, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.152357816696167, |
| "rewards/margins": 0.5534465909004211, |
| "rewards/rejected": -0.7058044672012329, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.1159089412332887, |
| "grad_norm": 3.6651833057403564, |
| "learning_rate": 9.482716049382716e-06, |
| "logits/chosen": 3.071345806121826, |
| "logits/rejected": 3.0848429203033447, |
| "logps/chosen": -0.6071802973747253, |
| "logps/rejected": -1.6902376413345337, |
| "loss": 0.6877, |
| "nll_loss": 0.06448554247617722, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.1821540892124176, |
| "rewards/margins": 0.324917197227478, |
| "rewards/rejected": -0.507071316242218, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.11678703927293482, |
| "grad_norm": 5.458474159240723, |
| "learning_rate": 9.47037037037037e-06, |
| "logits/chosen": 2.9955544471740723, |
| "logits/rejected": 2.9541006088256836, |
| "logps/chosen": -1.1582845449447632, |
| "logps/rejected": -2.687746047973633, |
| "loss": 0.8277, |
| "nll_loss": 0.14388300478458405, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -0.34748542308807373, |
| "rewards/margins": 0.45883846282958984, |
| "rewards/rejected": -0.8063238859176636, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.11766513731258095, |
| "grad_norm": 1.8496476411819458, |
| "learning_rate": 9.458024691358025e-06, |
| "logits/chosen": 2.8806967735290527, |
| "logits/rejected": 2.922480344772339, |
| "logps/chosen": -0.3559941351413727, |
| "logps/rejected": -2.068563938140869, |
| "loss": 0.5969, |
| "nll_loss": 0.03910910710692406, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -0.10679824650287628, |
| "rewards/margins": 0.5137708783149719, |
| "rewards/rejected": -0.6205691695213318, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.11854323535222708, |
| "grad_norm": 8.205083847045898, |
| "learning_rate": 9.44567901234568e-06, |
| "logits/chosen": 2.9301178455352783, |
| "logits/rejected": 2.972548246383667, |
| "logps/chosen": -0.6576313972473145, |
| "logps/rejected": -1.3083826303482056, |
| "loss": 0.7648, |
| "nll_loss": 0.0889785960316658, |
| "rewards/accuracies": 0.5249999761581421, |
| "rewards/chosen": -0.19728945195674896, |
| "rewards/margins": 0.19522538781166077, |
| "rewards/rejected": -0.39251479506492615, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.1194213333918732, |
| "grad_norm": 3.6549758911132812, |
| "learning_rate": 9.433333333333335e-06, |
| "logits/chosen": 3.0893311500549316, |
| "logits/rejected": 3.068948745727539, |
| "logps/chosen": -0.6783910989761353, |
| "logps/rejected": -2.4969258308410645, |
| "loss": 0.6481, |
| "nll_loss": 0.06912653148174286, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.203517347574234, |
| "rewards/margins": 0.545560359954834, |
| "rewards/rejected": -0.7490777969360352, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.12029943143151933, |
| "grad_norm": 1.5456334352493286, |
| "learning_rate": 9.42098765432099e-06, |
| "logits/chosen": 2.9339780807495117, |
| "logits/rejected": 2.9962122440338135, |
| "logps/chosen": -0.46310439705848694, |
| "logps/rejected": -2.436547040939331, |
| "loss": 0.6248, |
| "nll_loss": 0.06015176698565483, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.13893131911754608, |
| "rewards/margins": 0.5920329093933105, |
| "rewards/rejected": -0.730964183807373, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.12117752947116546, |
| "grad_norm": 2.95572829246521, |
| "learning_rate": 9.408641975308642e-06, |
| "logits/chosen": 2.7811484336853027, |
| "logits/rejected": 2.875540018081665, |
| "logps/chosen": -0.6040056347846985, |
| "logps/rejected": -2.7113006114959717, |
| "loss": 0.6342, |
| "nll_loss": 0.042366378009319305, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.18120168149471283, |
| "rewards/margins": 0.6321884393692017, |
| "rewards/rejected": -0.8133901357650757, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.12205562751081159, |
| "grad_norm": 5.596220970153809, |
| "learning_rate": 9.396296296296297e-06, |
| "logits/chosen": 2.9219090938568115, |
| "logits/rejected": 2.9703197479248047, |
| "logps/chosen": -1.1313722133636475, |
| "logps/rejected": -3.376211166381836, |
| "loss": 0.6689, |
| "nll_loss": 0.11231324821710587, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.3394116759300232, |
| "rewards/margins": 0.6734517216682434, |
| "rewards/rejected": -1.0128633975982666, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.12293372555045771, |
| "grad_norm": 4.154329776763916, |
| "learning_rate": 9.383950617283951e-06, |
| "logits/chosen": 2.875544548034668, |
| "logits/rejected": 2.909510374069214, |
| "logps/chosen": -0.7795349955558777, |
| "logps/rejected": -3.0624213218688965, |
| "loss": 0.6413, |
| "nll_loss": 0.09788934886455536, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.23386052250862122, |
| "rewards/margins": 0.6848658919334412, |
| "rewards/rejected": -0.9187263250350952, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.12381182359010383, |
| "grad_norm": 0.9190550446510315, |
| "learning_rate": 9.371604938271605e-06, |
| "logits/chosen": 2.800968885421753, |
| "logits/rejected": 2.7952873706817627, |
| "logps/chosen": -0.5842548608779907, |
| "logps/rejected": -2.0285487174987793, |
| "loss": 0.6672, |
| "nll_loss": 0.06469441950321198, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.17527645826339722, |
| "rewards/margins": 0.43328824639320374, |
| "rewards/rejected": -0.6085646748542786, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.12468992162974996, |
| "grad_norm": 0.4985824525356293, |
| "learning_rate": 9.35925925925926e-06, |
| "logits/chosen": 2.738670825958252, |
| "logits/rejected": 2.7031972408294678, |
| "logps/chosen": -0.714606761932373, |
| "logps/rejected": -1.1444988250732422, |
| "loss": 0.7813, |
| "nll_loss": 0.07031063735485077, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": -0.21438205242156982, |
| "rewards/margins": 0.12896756827831268, |
| "rewards/rejected": -0.3433496356010437, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.1255680196693961, |
| "grad_norm": 3.7305266857147217, |
| "learning_rate": 9.346913580246914e-06, |
| "logits/chosen": 2.893129825592041, |
| "logits/rejected": 2.9149386882781982, |
| "logps/chosen": -0.46482163667678833, |
| "logps/rejected": -2.2275373935699463, |
| "loss": 0.6239, |
| "nll_loss": 0.051440030336380005, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.13944648206233978, |
| "rewards/margins": 0.5288147926330566, |
| "rewards/rejected": -0.6682612299919128, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.1264461177090422, |
| "grad_norm": 1.8036631345748901, |
| "learning_rate": 9.334567901234569e-06, |
| "logits/chosen": 2.547828197479248, |
| "logits/rejected": 2.5787394046783447, |
| "logps/chosen": -0.6464110612869263, |
| "logps/rejected": -2.2243905067443848, |
| "loss": 0.6899, |
| "nll_loss": 0.06981117278337479, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -0.19392332434654236, |
| "rewards/margins": 0.47339382767677307, |
| "rewards/rejected": -0.6673170924186707, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.12732421574868835, |
| "grad_norm": 1.5003271102905273, |
| "learning_rate": 9.322222222222223e-06, |
| "logits/chosen": 2.7942347526550293, |
| "logits/rejected": 2.790160655975342, |
| "logps/chosen": -0.45981842279434204, |
| "logps/rejected": -1.9567676782608032, |
| "loss": 0.6536, |
| "nll_loss": 0.0419035442173481, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -0.1379455327987671, |
| "rewards/margins": 0.4490847587585449, |
| "rewards/rejected": -0.587030291557312, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.12820231378833447, |
| "grad_norm": 3.1813344955444336, |
| "learning_rate": 9.309876543209878e-06, |
| "logits/chosen": 2.6398301124572754, |
| "logits/rejected": 2.6550662517547607, |
| "logps/chosen": -0.6792389154434204, |
| "logps/rejected": -2.9893813133239746, |
| "loss": 0.6317, |
| "nll_loss": 0.08011049032211304, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.2037716805934906, |
| "rewards/margins": 0.6930428147315979, |
| "rewards/rejected": -0.8968144655227661, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.1290804118279806, |
| "grad_norm": 6.025292873382568, |
| "learning_rate": 9.297530864197532e-06, |
| "logits/chosen": 2.5487821102142334, |
| "logits/rejected": 2.6483190059661865, |
| "logps/chosen": -0.3606962561607361, |
| "logps/rejected": -2.5816264152526855, |
| "loss": 0.6086, |
| "nll_loss": 0.04057370498776436, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -0.10820887982845306, |
| "rewards/margins": 0.6662790179252625, |
| "rewards/rejected": -0.7744879126548767, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.12995850986762672, |
| "grad_norm": 31.56429100036621, |
| "learning_rate": 9.285185185185186e-06, |
| "logits/chosen": 2.4623022079467773, |
| "logits/rejected": 2.409700393676758, |
| "logps/chosen": -0.9133744239807129, |
| "logps/rejected": -3.7820403575897217, |
| "loss": 0.8332, |
| "nll_loss": 0.23980839550495148, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.27401235699653625, |
| "rewards/margins": 0.8605998158454895, |
| "rewards/rejected": -1.1346122026443481, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.13083660790727283, |
| "grad_norm": 0.035230621695518494, |
| "learning_rate": 9.27283950617284e-06, |
| "logits/chosen": 2.52907133102417, |
| "logits/rejected": 2.530890464782715, |
| "logps/chosen": -1.2194797992706299, |
| "logps/rejected": -2.8537631034851074, |
| "loss": 0.7781, |
| "nll_loss": 0.14254291355609894, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.36584392189979553, |
| "rewards/margins": 0.49028509855270386, |
| "rewards/rejected": -0.856128990650177, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.13171470594691898, |
| "grad_norm": 6.077812671661377, |
| "learning_rate": 9.260493827160495e-06, |
| "logits/chosen": 2.5622482299804688, |
| "logits/rejected": 2.607653856277466, |
| "logps/chosen": -0.8888137936592102, |
| "logps/rejected": -2.8897366523742676, |
| "loss": 0.6866, |
| "nll_loss": 0.07926015555858612, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.2666441798210144, |
| "rewards/margins": 0.6002769470214844, |
| "rewards/rejected": -0.8669211268424988, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.1325928039865651, |
| "grad_norm": 3.651186466217041, |
| "learning_rate": 9.24814814814815e-06, |
| "logits/chosen": 2.693009853363037, |
| "logits/rejected": 2.6718087196350098, |
| "logps/chosen": -0.8477522134780884, |
| "logps/rejected": -1.7480132579803467, |
| "loss": 0.7543, |
| "nll_loss": 0.09234277904033661, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.25432562828063965, |
| "rewards/margins": 0.2700783610343933, |
| "rewards/rejected": -0.5244040489196777, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.13347090202621123, |
| "grad_norm": 2.8306994438171387, |
| "learning_rate": 9.235802469135802e-06, |
| "logits/chosen": 2.8524880409240723, |
| "logits/rejected": 2.8818418979644775, |
| "logps/chosen": -0.49177321791648865, |
| "logps/rejected": -1.9612071514129639, |
| "loss": 0.6686, |
| "nll_loss": 0.08659791201353073, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.14753195643424988, |
| "rewards/margins": 0.4408302307128906, |
| "rewards/rejected": -0.5883622169494629, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.13434900006585734, |
| "grad_norm": 3.763047456741333, |
| "learning_rate": 9.223456790123457e-06, |
| "logits/chosen": 2.698072671890259, |
| "logits/rejected": 2.754232883453369, |
| "logps/chosen": -0.798735499382019, |
| "logps/rejected": -2.2809195518493652, |
| "loss": 0.6913, |
| "nll_loss": 0.07482419162988663, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.2396206557750702, |
| "rewards/margins": 0.44465526938438416, |
| "rewards/rejected": -0.6842759251594543, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.13522709810550348, |
| "grad_norm": 3.579263925552368, |
| "learning_rate": 9.211111111111111e-06, |
| "logits/chosen": 3.0055601596832275, |
| "logits/rejected": 3.015162706375122, |
| "logps/chosen": -0.6990305185317993, |
| "logps/rejected": -1.879294991493225, |
| "loss": 0.6897, |
| "nll_loss": 0.06048674136400223, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -0.20970916748046875, |
| "rewards/margins": 0.35407939553260803, |
| "rewards/rejected": -0.5637885332107544, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.1361051961451496, |
| "grad_norm": 1.4635862112045288, |
| "learning_rate": 9.198765432098766e-06, |
| "logits/chosen": 2.665912628173828, |
| "logits/rejected": 2.7371814250946045, |
| "logps/chosen": -0.644806981086731, |
| "logps/rejected": -2.0770316123962402, |
| "loss": 0.6949, |
| "nll_loss": 0.07454844564199448, |
| "rewards/accuracies": 0.48750001192092896, |
| "rewards/chosen": -0.19344215095043182, |
| "rewards/margins": 0.4296673834323883, |
| "rewards/rejected": -0.6231095194816589, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.13698329418479574, |
| "grad_norm": 3.593688726425171, |
| "learning_rate": 9.18641975308642e-06, |
| "logits/chosen": 3.0721404552459717, |
| "logits/rejected": 3.028421401977539, |
| "logps/chosen": -0.8886274099349976, |
| "logps/rejected": -2.1320443153381348, |
| "loss": 0.7047, |
| "nll_loss": 0.07659469544887543, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -0.2665882408618927, |
| "rewards/margins": 0.3730250298976898, |
| "rewards/rejected": -0.6396132707595825, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.13786139222444185, |
| "grad_norm": 6.7611470222473145, |
| "learning_rate": 9.174074074074074e-06, |
| "logits/chosen": 2.952538013458252, |
| "logits/rejected": 2.9351909160614014, |
| "logps/chosen": -0.5501828193664551, |
| "logps/rejected": -2.4539520740509033, |
| "loss": 0.6445, |
| "nll_loss": 0.07492227852344513, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.16505484282970428, |
| "rewards/margins": 0.5711307525634766, |
| "rewards/rejected": -0.7361854910850525, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.138739490264088, |
| "grad_norm": 0.00917895883321762, |
| "learning_rate": 9.161728395061729e-06, |
| "logits/chosen": 2.942192792892456, |
| "logits/rejected": 2.9979898929595947, |
| "logps/chosen": -0.4612821638584137, |
| "logps/rejected": -1.4370297193527222, |
| "loss": 0.6585, |
| "nll_loss": 0.04147753119468689, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.13838467001914978, |
| "rewards/margins": 0.2927243113517761, |
| "rewards/rejected": -0.43110889196395874, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.1396175883037341, |
| "grad_norm": 1.7900971174240112, |
| "learning_rate": 9.149382716049383e-06, |
| "logits/chosen": 2.7242748737335205, |
| "logits/rejected": 2.7570395469665527, |
| "logps/chosen": -0.5272113084793091, |
| "logps/rejected": -1.8239033222198486, |
| "loss": 0.6752, |
| "nll_loss": 0.07194850593805313, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.1581634134054184, |
| "rewards/margins": 0.3890075981616974, |
| "rewards/rejected": -0.5471709966659546, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.14049568634338025, |
| "grad_norm": 7.255580902099609, |
| "learning_rate": 9.137037037037038e-06, |
| "logits/chosen": 2.852332592010498, |
| "logits/rejected": 2.8654465675354004, |
| "logps/chosen": -0.8652445077896118, |
| "logps/rejected": -2.7340779304504395, |
| "loss": 0.7026, |
| "nll_loss": 0.08957532793283463, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": -0.25957340002059937, |
| "rewards/margins": 0.5606500506401062, |
| "rewards/rejected": -0.8202234506607056, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.14137378438302636, |
| "grad_norm": 0.27785807847976685, |
| "learning_rate": 9.124691358024692e-06, |
| "logits/chosen": 2.8517799377441406, |
| "logits/rejected": 2.871009111404419, |
| "logps/chosen": -0.45925140380859375, |
| "logps/rejected": -1.38749098777771, |
| "loss": 0.6784, |
| "nll_loss": 0.046599697321653366, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -0.13777543604373932, |
| "rewards/margins": 0.2784718871116638, |
| "rewards/rejected": -0.41624727845191956, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.1422518824226725, |
| "grad_norm": 2.3490424156188965, |
| "learning_rate": 9.112345679012347e-06, |
| "logits/chosen": 3.001574754714966, |
| "logits/rejected": 2.9703142642974854, |
| "logps/chosen": -0.4932584762573242, |
| "logps/rejected": -2.1252918243408203, |
| "loss": 0.6813, |
| "nll_loss": 0.08429791033267975, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -0.1479775607585907, |
| "rewards/margins": 0.48961010575294495, |
| "rewards/rejected": -0.6375876665115356, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.14312998046231862, |
| "grad_norm": 3.299020767211914, |
| "learning_rate": 9.100000000000001e-06, |
| "logits/chosen": 2.8275907039642334, |
| "logits/rejected": 2.9383082389831543, |
| "logps/chosen": -0.6793197989463806, |
| "logps/rejected": -2.526850700378418, |
| "loss": 0.6513, |
| "nll_loss": 0.05579754710197449, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -0.20379595458507538, |
| "rewards/margins": 0.5542593002319336, |
| "rewards/rejected": -0.7580552101135254, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.14400807850196473, |
| "grad_norm": 0.914357602596283, |
| "learning_rate": 9.087654320987655e-06, |
| "logits/chosen": 2.987334966659546, |
| "logits/rejected": 2.9897654056549072, |
| "logps/chosen": -0.665000319480896, |
| "logps/rejected": -2.4969944953918457, |
| "loss": 0.6926, |
| "nll_loss": 0.07977604120969772, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.19950011372566223, |
| "rewards/margins": 0.5495983362197876, |
| "rewards/rejected": -0.7490984797477722, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.14488617654161087, |
| "grad_norm": 2.353787422180176, |
| "learning_rate": 9.075308641975308e-06, |
| "logits/chosen": 2.7523000240325928, |
| "logits/rejected": 2.805290937423706, |
| "logps/chosen": -0.34468549489974976, |
| "logps/rejected": -2.1994361877441406, |
| "loss": 0.6206, |
| "nll_loss": 0.04046661779284477, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.1034056693315506, |
| "rewards/margins": 0.5564252138137817, |
| "rewards/rejected": -0.6598309278488159, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.145764274581257, |
| "grad_norm": 9.683286666870117, |
| "learning_rate": 9.062962962962964e-06, |
| "logits/chosen": 2.8048713207244873, |
| "logits/rejected": 2.8911221027374268, |
| "logps/chosen": -0.8389989137649536, |
| "logps/rejected": -1.6681379079818726, |
| "loss": 0.7894, |
| "nll_loss": 0.09216944873332977, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.25169968605041504, |
| "rewards/margins": 0.24874171614646912, |
| "rewards/rejected": -0.5004413723945618, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.14664237262090313, |
| "grad_norm": 9.369816780090332, |
| "learning_rate": 9.050617283950619e-06, |
| "logits/chosen": 2.8822600841522217, |
| "logits/rejected": 2.904628038406372, |
| "logps/chosen": -0.8616389036178589, |
| "logps/rejected": -2.4774065017700195, |
| "loss": 0.6733, |
| "nll_loss": 0.052507419139146805, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.2584916949272156, |
| "rewards/margins": 0.48473024368286133, |
| "rewards/rejected": -0.7432219386100769, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.14752047066054924, |
| "grad_norm": 1.7723337411880493, |
| "learning_rate": 9.038271604938273e-06, |
| "logits/chosen": 3.0452260971069336, |
| "logits/rejected": 3.092968225479126, |
| "logps/chosen": -0.5382004380226135, |
| "logps/rejected": -1.853811264038086, |
| "loss": 0.6863, |
| "nll_loss": 0.07374037802219391, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.16146014630794525, |
| "rewards/margins": 0.39468324184417725, |
| "rewards/rejected": -0.5561434030532837, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.14839856870019538, |
| "grad_norm": 4.466056823730469, |
| "learning_rate": 9.025925925925927e-06, |
| "logits/chosen": 2.8592844009399414, |
| "logits/rejected": 2.896420955657959, |
| "logps/chosen": -0.5004099011421204, |
| "logps/rejected": -1.6472933292388916, |
| "loss": 0.6771, |
| "nll_loss": 0.05312333256006241, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -0.1501229852437973, |
| "rewards/margins": 0.34406501054763794, |
| "rewards/rejected": -0.49418801069259644, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.1492766667398415, |
| "grad_norm": 2.437678098678589, |
| "learning_rate": 9.013580246913582e-06, |
| "logits/chosen": 2.8448596000671387, |
| "logits/rejected": 2.8915913105010986, |
| "logps/chosen": -0.6149319410324097, |
| "logps/rejected": -1.89218008518219, |
| "loss": 0.673, |
| "nll_loss": 0.05624104663729668, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.18447960913181305, |
| "rewards/margins": 0.3831743597984314, |
| "rewards/rejected": -0.567654013633728, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.15015476477948764, |
| "grad_norm": 3.493530750274658, |
| "learning_rate": 9.001234567901236e-06, |
| "logits/chosen": 2.781919479370117, |
| "logits/rejected": 2.7699952125549316, |
| "logps/chosen": -0.6766859889030457, |
| "logps/rejected": -2.136669635772705, |
| "loss": 0.6503, |
| "nll_loss": 0.06224127486348152, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.2030058205127716, |
| "rewards/margins": 0.43799519538879395, |
| "rewards/rejected": -0.6410009264945984, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.15103286281913375, |
| "grad_norm": 3.9590651988983154, |
| "learning_rate": 8.988888888888889e-06, |
| "logits/chosen": 2.562682628631592, |
| "logits/rejected": 2.5419199466705322, |
| "logps/chosen": -0.965559184551239, |
| "logps/rejected": -2.73313570022583, |
| "loss": 0.7286, |
| "nll_loss": 0.10021784156560898, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": -0.2896677553653717, |
| "rewards/margins": 0.5302730798721313, |
| "rewards/rejected": -0.8199408650398254, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.1519109608587799, |
| "grad_norm": 6.649216175079346, |
| "learning_rate": 8.976543209876543e-06, |
| "logits/chosen": 2.754970073699951, |
| "logits/rejected": 2.800556182861328, |
| "logps/chosen": -0.7695094347000122, |
| "logps/rejected": -2.4826478958129883, |
| "loss": 0.7055, |
| "nll_loss": 0.05943988636136055, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.23085281252861023, |
| "rewards/margins": 0.5139415264129639, |
| "rewards/rejected": -0.7447944283485413, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.152789058898426, |
| "grad_norm": 3.0816867351531982, |
| "learning_rate": 8.964197530864198e-06, |
| "logits/chosen": 2.7005701065063477, |
| "logits/rejected": 2.8037772178649902, |
| "logps/chosen": -0.7151850461959839, |
| "logps/rejected": -2.9252617359161377, |
| "loss": 0.6407, |
| "nll_loss": 0.06549613177776337, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.2145555019378662, |
| "rewards/margins": 0.6630231142044067, |
| "rewards/rejected": -0.8775785565376282, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.15366715693807215, |
| "grad_norm": 3.4558446407318115, |
| "learning_rate": 8.951851851851852e-06, |
| "logits/chosen": 2.783221483230591, |
| "logits/rejected": 2.7478537559509277, |
| "logps/chosen": -0.3045424818992615, |
| "logps/rejected": -1.9958875179290771, |
| "loss": 0.5977, |
| "nll_loss": 0.030114714056253433, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.09136275202035904, |
| "rewards/margins": 0.507403552532196, |
| "rewards/rejected": -0.5987662672996521, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.15454525497771826, |
| "grad_norm": 2.812927722930908, |
| "learning_rate": 8.939506172839507e-06, |
| "logits/chosen": 2.567533016204834, |
| "logits/rejected": 2.5942509174346924, |
| "logps/chosen": -0.7922319173812866, |
| "logps/rejected": -1.9200432300567627, |
| "loss": 0.7427, |
| "nll_loss": 0.07532784342765808, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.23766958713531494, |
| "rewards/margins": 0.33834341168403625, |
| "rewards/rejected": -0.5760129690170288, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.1554233530173644, |
| "grad_norm": 4.2586750984191895, |
| "learning_rate": 8.927160493827161e-06, |
| "logits/chosen": 2.8497743606567383, |
| "logits/rejected": 2.857257604598999, |
| "logps/chosen": -1.0099961757659912, |
| "logps/rejected": -3.4478023052215576, |
| "loss": 0.6624, |
| "nll_loss": 0.1071515902876854, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.302998811006546, |
| "rewards/margins": 0.7313419580459595, |
| "rewards/rejected": -1.034340739250183, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.15630145105701052, |
| "grad_norm": 1.7801121473312378, |
| "learning_rate": 8.914814814814816e-06, |
| "logits/chosen": 2.7116096019744873, |
| "logits/rejected": 2.7383835315704346, |
| "logps/chosen": -0.6959985494613647, |
| "logps/rejected": -2.842719554901123, |
| "loss": 0.626, |
| "nll_loss": 0.07402163743972778, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.2087995707988739, |
| "rewards/margins": 0.6440162658691406, |
| "rewards/rejected": -0.8528158068656921, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.15717954909665663, |
| "grad_norm": 2.949233055114746, |
| "learning_rate": 8.90246913580247e-06, |
| "logits/chosen": 2.5691332817077637, |
| "logits/rejected": 2.6201071739196777, |
| "logps/chosen": -0.7052744626998901, |
| "logps/rejected": -2.180938720703125, |
| "loss": 0.6607, |
| "nll_loss": 0.058830149471759796, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.21158234775066376, |
| "rewards/margins": 0.4426993429660797, |
| "rewards/rejected": -0.6542816758155823, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.15805764713630277, |
| "grad_norm": 0.5553939938545227, |
| "learning_rate": 8.890123456790124e-06, |
| "logits/chosen": 2.677717685699463, |
| "logits/rejected": 2.782273292541504, |
| "logps/chosen": -0.4674352705478668, |
| "logps/rejected": -3.086191177368164, |
| "loss": 0.5701, |
| "nll_loss": 0.04577519744634628, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.14023058116436005, |
| "rewards/margins": 0.7856268286705017, |
| "rewards/rejected": -0.925857424736023, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.15893574517594888, |
| "grad_norm": 2.8835935592651367, |
| "learning_rate": 8.877777777777779e-06, |
| "logits/chosen": 2.745272636413574, |
| "logits/rejected": 2.806513547897339, |
| "logps/chosen": -0.5373865962028503, |
| "logps/rejected": -3.408268451690674, |
| "loss": 0.6131, |
| "nll_loss": 0.053614210337400436, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -0.16121599078178406, |
| "rewards/margins": 0.8612645864486694, |
| "rewards/rejected": -1.0224807262420654, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.15981384321559503, |
| "grad_norm": 6.393036365509033, |
| "learning_rate": 8.865432098765433e-06, |
| "logits/chosen": 2.6081528663635254, |
| "logits/rejected": 2.663269281387329, |
| "logps/chosen": -0.6596813201904297, |
| "logps/rejected": -3.0106234550476074, |
| "loss": 0.6523, |
| "nll_loss": 0.07575313746929169, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.19790442287921906, |
| "rewards/margins": 0.7052826881408691, |
| "rewards/rejected": -0.903187096118927, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.16069194125524114, |
| "grad_norm": 8.32457160949707, |
| "learning_rate": 8.853086419753088e-06, |
| "logits/chosen": 2.1919102668762207, |
| "logits/rejected": 2.2420222759246826, |
| "logps/chosen": -0.6085657477378845, |
| "logps/rejected": -3.387340545654297, |
| "loss": 0.6396, |
| "nll_loss": 0.05589609593153, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.1825697273015976, |
| "rewards/margins": 0.8336323499679565, |
| "rewards/rejected": -1.0162022113800049, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.16157003929488728, |
| "grad_norm": 8.0199556350708, |
| "learning_rate": 8.840740740740742e-06, |
| "logits/chosen": 2.2459845542907715, |
| "logits/rejected": 2.2368826866149902, |
| "logps/chosen": -0.904313862323761, |
| "logps/rejected": -3.4034416675567627, |
| "loss": 0.7623, |
| "nll_loss": 0.13848955929279327, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.27129411697387695, |
| "rewards/margins": 0.7497383952140808, |
| "rewards/rejected": -1.0210325717926025, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.1624481373345334, |
| "grad_norm": 3.155104398727417, |
| "learning_rate": 8.828395061728395e-06, |
| "logits/chosen": 2.594691514968872, |
| "logits/rejected": 2.571620464324951, |
| "logps/chosen": -1.037233591079712, |
| "logps/rejected": -2.723869800567627, |
| "loss": 0.7723, |
| "nll_loss": 0.12755393981933594, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.3111700415611267, |
| "rewards/margins": 0.5059908628463745, |
| "rewards/rejected": -0.817160964012146, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.16332623537417953, |
| "grad_norm": 3.548657178878784, |
| "learning_rate": 8.81604938271605e-06, |
| "logits/chosen": 2.7853002548217773, |
| "logits/rejected": 2.796757221221924, |
| "logps/chosen": -0.5054196119308472, |
| "logps/rejected": -1.9281543493270874, |
| "loss": 0.6821, |
| "nll_loss": 0.06374648213386536, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.15162590146064758, |
| "rewards/margins": 0.42682045698165894, |
| "rewards/rejected": -0.5784463286399841, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.16420433341382565, |
| "grad_norm": 5.429587364196777, |
| "learning_rate": 8.803703703703704e-06, |
| "logits/chosen": 2.6975064277648926, |
| "logits/rejected": 2.7702746391296387, |
| "logps/chosen": -0.7951546907424927, |
| "logps/rejected": -2.666987895965576, |
| "loss": 0.6656, |
| "nll_loss": 0.06336641311645508, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -0.2385464459657669, |
| "rewards/margins": 0.5615500211715698, |
| "rewards/rejected": -0.8000965118408203, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.1650824314534718, |
| "grad_norm": 1.150895357131958, |
| "learning_rate": 8.791358024691358e-06, |
| "logits/chosen": 2.687932252883911, |
| "logits/rejected": 2.754683017730713, |
| "logps/chosen": -0.7847083806991577, |
| "logps/rejected": -2.204124927520752, |
| "loss": 0.7186, |
| "nll_loss": 0.08406446129083633, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -0.23541252315044403, |
| "rewards/margins": 0.42582497000694275, |
| "rewards/rejected": -0.6612375378608704, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.1659605294931179, |
| "grad_norm": 2.4268455505371094, |
| "learning_rate": 8.779012345679012e-06, |
| "logits/chosen": 2.5794925689697266, |
| "logits/rejected": 2.6048686504364014, |
| "logps/chosen": -0.6307061314582825, |
| "logps/rejected": -2.8818671703338623, |
| "loss": 0.619, |
| "nll_loss": 0.07149704545736313, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.1892118602991104, |
| "rewards/margins": 0.6753484010696411, |
| "rewards/rejected": -0.8645601272583008, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.16683862753276404, |
| "grad_norm": 5.715259075164795, |
| "learning_rate": 8.766666666666669e-06, |
| "logits/chosen": 2.707252025604248, |
| "logits/rejected": 2.6988537311553955, |
| "logps/chosen": -0.6423169374465942, |
| "logps/rejected": -1.875507116317749, |
| "loss": 0.6782, |
| "nll_loss": 0.04863595962524414, |
| "rewards/accuracies": 0.5249999761581421, |
| "rewards/chosen": -0.19269508123397827, |
| "rewards/margins": 0.3699570894241333, |
| "rewards/rejected": -0.5626521706581116, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.16771672557241016, |
| "grad_norm": 1.6416656970977783, |
| "learning_rate": 8.754320987654323e-06, |
| "logits/chosen": 2.792642593383789, |
| "logits/rejected": 2.8566908836364746, |
| "logps/chosen": -0.6888980269432068, |
| "logps/rejected": -2.5319087505340576, |
| "loss": 0.653, |
| "nll_loss": 0.06277020275592804, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.206669420003891, |
| "rewards/margins": 0.5529031753540039, |
| "rewards/rejected": -0.7595726251602173, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.1685948236120563, |
| "grad_norm": 1.5920031070709229, |
| "learning_rate": 8.741975308641976e-06, |
| "logits/chosen": 2.6872434616088867, |
| "logits/rejected": 2.6933059692382812, |
| "logps/chosen": -0.5184003710746765, |
| "logps/rejected": -1.9327194690704346, |
| "loss": 0.6515, |
| "nll_loss": 0.06715475022792816, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.15552011132240295, |
| "rewards/margins": 0.42429572343826294, |
| "rewards/rejected": -0.5798158049583435, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.1694729216517024, |
| "grad_norm": 4.420103073120117, |
| "learning_rate": 8.72962962962963e-06, |
| "logits/chosen": 2.835470676422119, |
| "logits/rejected": 2.8618292808532715, |
| "logps/chosen": -0.7457289695739746, |
| "logps/rejected": -2.064795970916748, |
| "loss": 0.7263, |
| "nll_loss": 0.10260520875453949, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.22371868789196014, |
| "rewards/margins": 0.39572006464004517, |
| "rewards/rejected": -0.6194387674331665, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.17035101969134853, |
| "grad_norm": 2.6691431999206543, |
| "learning_rate": 8.717283950617285e-06, |
| "logits/chosen": 2.6595866680145264, |
| "logits/rejected": 2.683384656906128, |
| "logps/chosen": -0.5285651087760925, |
| "logps/rejected": -1.9386451244354248, |
| "loss": 0.6966, |
| "nll_loss": 0.07593102753162384, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": -0.1585695445537567, |
| "rewards/margins": 0.4230240285396576, |
| "rewards/rejected": -0.5815936326980591, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.17122911773099467, |
| "grad_norm": 1.895266056060791, |
| "learning_rate": 8.704938271604939e-06, |
| "logits/chosen": 2.801657199859619, |
| "logits/rejected": 2.762845516204834, |
| "logps/chosen": -0.6032005548477173, |
| "logps/rejected": -1.6262376308441162, |
| "loss": 0.6888, |
| "nll_loss": 0.060188956558704376, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.18096016347408295, |
| "rewards/margins": 0.30691108107566833, |
| "rewards/rejected": -0.4878712594509125, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.17210721577064078, |
| "grad_norm": 1.7787566184997559, |
| "learning_rate": 8.692592592592593e-06, |
| "logits/chosen": 2.8991875648498535, |
| "logits/rejected": 2.936009645462036, |
| "logps/chosen": -0.5252435803413391, |
| "logps/rejected": -1.9678659439086914, |
| "loss": 0.6254, |
| "nll_loss": 0.047948211431503296, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -0.15757307410240173, |
| "rewards/margins": 0.432786762714386, |
| "rewards/rejected": -0.5903598070144653, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.17298531381028692, |
| "grad_norm": 1.326920747756958, |
| "learning_rate": 8.680246913580248e-06, |
| "logits/chosen": 2.5659279823303223, |
| "logits/rejected": 2.590271472930908, |
| "logps/chosen": -0.5297742486000061, |
| "logps/rejected": -2.028113842010498, |
| "loss": 0.6545, |
| "nll_loss": 0.05374212935566902, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.15893225371837616, |
| "rewards/margins": 0.4495018422603607, |
| "rewards/rejected": -0.6084341406822205, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.17386341184993304, |
| "grad_norm": 4.611727714538574, |
| "learning_rate": 8.667901234567902e-06, |
| "logits/chosen": 2.6264684200286865, |
| "logits/rejected": 2.632913589477539, |
| "logps/chosen": -0.693698525428772, |
| "logps/rejected": -3.1472320556640625, |
| "loss": 0.6069, |
| "nll_loss": 0.05521649122238159, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.20810957252979279, |
| "rewards/margins": 0.7360601425170898, |
| "rewards/rejected": -0.9441697001457214, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.17474150988957918, |
| "grad_norm": 2.2870845794677734, |
| "learning_rate": 8.655555555555557e-06, |
| "logits/chosen": 2.3779568672180176, |
| "logits/rejected": 2.4282214641571045, |
| "logps/chosen": -0.24996769428253174, |
| "logps/rejected": -2.317931890487671, |
| "loss": 0.5852, |
| "nll_loss": 0.045966412872076035, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.07499031722545624, |
| "rewards/margins": 0.6203892230987549, |
| "rewards/rejected": -0.6953796148300171, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.1756196079292253, |
| "grad_norm": 1.1437376737594604, |
| "learning_rate": 8.643209876543211e-06, |
| "logits/chosen": 2.380004644393921, |
| "logits/rejected": 2.3685264587402344, |
| "logps/chosen": -0.5816354751586914, |
| "logps/rejected": -2.3618433475494385, |
| "loss": 0.6945, |
| "nll_loss": 0.07123078405857086, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.17449061572551727, |
| "rewards/margins": 0.5340624451637268, |
| "rewards/rejected": -0.70855313539505, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.17649770596887143, |
| "grad_norm": 9.37255573272705, |
| "learning_rate": 8.630864197530865e-06, |
| "logits/chosen": 2.211916446685791, |
| "logits/rejected": 2.220418930053711, |
| "logps/chosen": -0.5349146723747253, |
| "logps/rejected": -2.8715481758117676, |
| "loss": 0.6366, |
| "nll_loss": 0.06667280942201614, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.16047440469264984, |
| "rewards/margins": 0.7009900808334351, |
| "rewards/rejected": -0.8614645004272461, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.17737580400851755, |
| "grad_norm": 2.2170488834381104, |
| "learning_rate": 8.61851851851852e-06, |
| "logits/chosen": 1.94232177734375, |
| "logits/rejected": 1.9853718280792236, |
| "logps/chosen": -0.2896527945995331, |
| "logps/rejected": -3.063877820968628, |
| "loss": 0.6189, |
| "nll_loss": 0.046251922845840454, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.08689583837985992, |
| "rewards/margins": 0.8322674632072449, |
| "rewards/rejected": -0.9191633462905884, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.1782539020481637, |
| "grad_norm": 8.343827247619629, |
| "learning_rate": 8.606172839506174e-06, |
| "logits/chosen": 1.9204469919204712, |
| "logits/rejected": 1.948312759399414, |
| "logps/chosen": -1.1184568405151367, |
| "logps/rejected": -3.0377535820007324, |
| "loss": 0.8225, |
| "nll_loss": 0.12297489494085312, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.3355370759963989, |
| "rewards/margins": 0.5757889747619629, |
| "rewards/rejected": -0.9113261103630066, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.1791320000878098, |
| "grad_norm": 13.199078559875488, |
| "learning_rate": 8.593827160493829e-06, |
| "logits/chosen": 2.0205349922180176, |
| "logits/rejected": 2.0275635719299316, |
| "logps/chosen": -1.428043007850647, |
| "logps/rejected": -2.872222661972046, |
| "loss": 0.9056, |
| "nll_loss": 0.1807326227426529, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.4284129738807678, |
| "rewards/margins": 0.4332540035247803, |
| "rewards/rejected": -0.8616668581962585, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.18001009812745594, |
| "grad_norm": 2.260270357131958, |
| "learning_rate": 8.581481481481481e-06, |
| "logits/chosen": 2.2045745849609375, |
| "logits/rejected": 2.187514543533325, |
| "logps/chosen": -0.4845626950263977, |
| "logps/rejected": -2.2464981079101562, |
| "loss": 0.6956, |
| "nll_loss": 0.06010964512825012, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.1453687846660614, |
| "rewards/margins": 0.5285806059837341, |
| "rewards/rejected": -0.6739493608474731, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.18088819616710206, |
| "grad_norm": 2.032146453857422, |
| "learning_rate": 8.569135802469136e-06, |
| "logits/chosen": 2.2264397144317627, |
| "logits/rejected": 2.265310764312744, |
| "logps/chosen": -1.0286977291107178, |
| "logps/rejected": -3.1123714447021484, |
| "loss": 0.7175, |
| "nll_loss": 0.11090108007192612, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.3086093068122864, |
| "rewards/margins": 0.6251022815704346, |
| "rewards/rejected": -0.9337115287780762, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.1817662942067482, |
| "grad_norm": 0.9217659831047058, |
| "learning_rate": 8.55679012345679e-06, |
| "logits/chosen": 2.2858176231384277, |
| "logits/rejected": 2.315831422805786, |
| "logps/chosen": -0.6487756967544556, |
| "logps/rejected": -2.1730990409851074, |
| "loss": 0.6862, |
| "nll_loss": 0.07243213802576065, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -0.19463272392749786, |
| "rewards/margins": 0.4572969377040863, |
| "rewards/rejected": -0.6519297361373901, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.1826443922463943, |
| "grad_norm": 4.933953285217285, |
| "learning_rate": 8.544444444444445e-06, |
| "logits/chosen": 2.4036874771118164, |
| "logits/rejected": 2.3770546913146973, |
| "logps/chosen": -0.9166983366012573, |
| "logps/rejected": -2.5144124031066895, |
| "loss": 0.7355, |
| "nll_loss": 0.12400822341442108, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.27500951290130615, |
| "rewards/margins": 0.47931423783302307, |
| "rewards/rejected": -0.7543236613273621, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.18352249028604042, |
| "grad_norm": 5.920201778411865, |
| "learning_rate": 8.532098765432099e-06, |
| "logits/chosen": 2.504974603652954, |
| "logits/rejected": 2.541961193084717, |
| "logps/chosen": -0.8963934779167175, |
| "logps/rejected": -2.740062713623047, |
| "loss": 0.7162, |
| "nll_loss": 0.1004381999373436, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.2689180374145508, |
| "rewards/margins": 0.5531007051467896, |
| "rewards/rejected": -0.8220188021659851, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.18440058832568657, |
| "grad_norm": 0.10835571587085724, |
| "learning_rate": 8.519753086419754e-06, |
| "logits/chosen": 2.54685640335083, |
| "logits/rejected": 2.627911329269409, |
| "logps/chosen": -0.33035722374916077, |
| "logps/rejected": -2.806840419769287, |
| "loss": 0.561, |
| "nll_loss": 0.03572739288210869, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.09910716861486435, |
| "rewards/margins": 0.7429450154304504, |
| "rewards/rejected": -0.8420522809028625, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.18527868636533268, |
| "grad_norm": 5.237948417663574, |
| "learning_rate": 8.507407407407408e-06, |
| "logits/chosen": 2.4657981395721436, |
| "logits/rejected": 2.541999101638794, |
| "logps/chosen": -0.5284551382064819, |
| "logps/rejected": -2.2460741996765137, |
| "loss": 0.6753, |
| "nll_loss": 0.055809833109378815, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.15853655338287354, |
| "rewards/margins": 0.515285849571228, |
| "rewards/rejected": -0.6738223433494568, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.18615678440497882, |
| "grad_norm": 6.183863162994385, |
| "learning_rate": 8.495061728395062e-06, |
| "logits/chosen": 2.370246171951294, |
| "logits/rejected": 2.388896942138672, |
| "logps/chosen": -0.5437101125717163, |
| "logps/rejected": -2.8997700214385986, |
| "loss": 0.6217, |
| "nll_loss": 0.07455585151910782, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.1631130427122116, |
| "rewards/margins": 0.7068179249763489, |
| "rewards/rejected": -0.8699310421943665, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.18703488244462493, |
| "grad_norm": 3.2471072673797607, |
| "learning_rate": 8.482716049382717e-06, |
| "logits/chosen": 2.301064968109131, |
| "logits/rejected": 2.3306994438171387, |
| "logps/chosen": -1.0358989238739014, |
| "logps/rejected": -3.3612143993377686, |
| "loss": 0.6543, |
| "nll_loss": 0.08921568840742111, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.3107697069644928, |
| "rewards/margins": 0.6975947618484497, |
| "rewards/rejected": -1.0083644390106201, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.18791298048427107, |
| "grad_norm": 2.2561800479888916, |
| "learning_rate": 8.470370370370371e-06, |
| "logits/chosen": 2.35896635055542, |
| "logits/rejected": 2.424318790435791, |
| "logps/chosen": -0.42497625946998596, |
| "logps/rejected": -3.6937179565429688, |
| "loss": 0.5684, |
| "nll_loss": 0.049736388027668, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.12749287486076355, |
| "rewards/margins": 0.9806225895881653, |
| "rewards/rejected": -1.1081154346466064, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.1887910785239172, |
| "grad_norm": 3.721003293991089, |
| "learning_rate": 8.458024691358026e-06, |
| "logits/chosen": 2.294174909591675, |
| "logits/rejected": 2.332521915435791, |
| "logps/chosen": -0.3734773099422455, |
| "logps/rejected": -2.520681858062744, |
| "loss": 0.6118, |
| "nll_loss": 0.03998088836669922, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.11204320192337036, |
| "rewards/margins": 0.6441613435745239, |
| "rewards/rejected": -0.7562046051025391, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.18966917656356333, |
| "grad_norm": 0.09796835482120514, |
| "learning_rate": 8.44567901234568e-06, |
| "logits/chosen": 2.144991397857666, |
| "logits/rejected": 2.220353364944458, |
| "logps/chosen": -0.5873435139656067, |
| "logps/rejected": -3.0226333141326904, |
| "loss": 0.6662, |
| "nll_loss": 0.05847520753741264, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -0.17620307207107544, |
| "rewards/margins": 0.7305869460105896, |
| "rewards/rejected": -0.9067900776863098, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.19054727460320944, |
| "grad_norm": 5.563838005065918, |
| "learning_rate": 8.433333333333334e-06, |
| "logits/chosen": 2.4212334156036377, |
| "logits/rejected": 2.4930455684661865, |
| "logps/chosen": -1.0709176063537598, |
| "logps/rejected": -3.6808440685272217, |
| "loss": 0.705, |
| "nll_loss": 0.1328948587179184, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.3212752938270569, |
| "rewards/margins": 0.7829779386520386, |
| "rewards/rejected": -1.1042531728744507, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.19142537264285558, |
| "grad_norm": 2.7498562335968018, |
| "learning_rate": 8.420987654320987e-06, |
| "logits/chosen": 2.3793070316314697, |
| "logits/rejected": 2.4112510681152344, |
| "logps/chosen": -0.8520647883415222, |
| "logps/rejected": -3.673933506011963, |
| "loss": 0.6735, |
| "nll_loss": 0.11274605989456177, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.25561946630477905, |
| "rewards/margins": 0.8465606570243835, |
| "rewards/rejected": -1.102180004119873, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.1923034706825017, |
| "grad_norm": 2.2553093433380127, |
| "learning_rate": 8.408641975308642e-06, |
| "logits/chosen": 2.5548908710479736, |
| "logits/rejected": 2.6078009605407715, |
| "logps/chosen": -0.457774817943573, |
| "logps/rejected": -3.5358822345733643, |
| "loss": 0.6123, |
| "nll_loss": 0.05915825441479683, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.13733243942260742, |
| "rewards/margins": 0.9234321713447571, |
| "rewards/rejected": -1.0607647895812988, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.19318156872214784, |
| "grad_norm": 2.100526809692383, |
| "learning_rate": 8.396296296296296e-06, |
| "logits/chosen": 2.438559055328369, |
| "logits/rejected": 2.4799530506134033, |
| "logps/chosen": -0.5425572395324707, |
| "logps/rejected": -2.516019582748413, |
| "loss": 0.6859, |
| "nll_loss": 0.06712070107460022, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": -0.1627671718597412, |
| "rewards/margins": 0.5920388698577881, |
| "rewards/rejected": -0.7548059821128845, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.19405966676179395, |
| "grad_norm": 0.8824114203453064, |
| "learning_rate": 8.383950617283952e-06, |
| "logits/chosen": 2.6323628425598145, |
| "logits/rejected": 2.694988489151001, |
| "logps/chosen": -0.5362733006477356, |
| "logps/rejected": -3.8703293800354004, |
| "loss": 0.5774, |
| "nll_loss": 0.0370684489607811, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.16088199615478516, |
| "rewards/margins": 1.0002167224884033, |
| "rewards/rejected": -1.1610987186431885, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.1949377648014401, |
| "grad_norm": 7.0314717292785645, |
| "learning_rate": 8.371604938271607e-06, |
| "logits/chosen": 2.701634407043457, |
| "logits/rejected": 2.749321222305298, |
| "logps/chosen": -0.914169430732727, |
| "logps/rejected": -2.8599061965942383, |
| "loss": 0.7608, |
| "nll_loss": 0.10523217916488647, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.2742508053779602, |
| "rewards/margins": 0.5837210416793823, |
| "rewards/rejected": -0.8579719662666321, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.1958158628410862, |
| "grad_norm": 0.8926441669464111, |
| "learning_rate": 8.359259259259261e-06, |
| "logits/chosen": 2.576303720474243, |
| "logits/rejected": 2.606104850769043, |
| "logps/chosen": -0.8748048543930054, |
| "logps/rejected": -2.003169536590576, |
| "loss": 0.7253, |
| "nll_loss": 0.07492824643850327, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": -0.2624414563179016, |
| "rewards/margins": 0.33850938081741333, |
| "rewards/rejected": -0.6009508371353149, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.19669396088073232, |
| "grad_norm": 1.0316241979599, |
| "learning_rate": 8.346913580246915e-06, |
| "logits/chosen": 2.5414836406707764, |
| "logits/rejected": 2.6227262020111084, |
| "logps/chosen": -0.6043969988822937, |
| "logps/rejected": -3.0870282649993896, |
| "loss": 0.5888, |
| "nll_loss": 0.04015268385410309, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.18131910264492035, |
| "rewards/margins": 0.7447894215583801, |
| "rewards/rejected": -0.9261085391044617, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.19757205892037846, |
| "grad_norm": 7.171932220458984, |
| "learning_rate": 8.334567901234568e-06, |
| "logits/chosen": 2.5014796257019043, |
| "logits/rejected": 2.4662632942199707, |
| "logps/chosen": -0.5610159039497375, |
| "logps/rejected": -2.8177947998046875, |
| "loss": 0.6317, |
| "nll_loss": 0.06384368985891342, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.16830478608608246, |
| "rewards/margins": 0.6770337224006653, |
| "rewards/rejected": -0.8453385233879089, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.19845015696002458, |
| "grad_norm": 3.9543685913085938, |
| "learning_rate": 8.322222222222223e-06, |
| "logits/chosen": 2.628187656402588, |
| "logits/rejected": 2.585869312286377, |
| "logps/chosen": -0.62028568983078, |
| "logps/rejected": -2.7624683380126953, |
| "loss": 0.6441, |
| "nll_loss": 0.06617014110088348, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.18608573079109192, |
| "rewards/margins": 0.6426547765731812, |
| "rewards/rejected": -0.8287404775619507, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.19932825499967072, |
| "grad_norm": 4.6166815757751465, |
| "learning_rate": 8.309876543209877e-06, |
| "logits/chosen": 2.4966917037963867, |
| "logits/rejected": 2.537562847137451, |
| "logps/chosen": -0.963221549987793, |
| "logps/rejected": -3.5489554405212402, |
| "loss": 0.663, |
| "nll_loss": 0.06778384000062943, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.28896647691726685, |
| "rewards/margins": 0.7757201790809631, |
| "rewards/rejected": -1.06468665599823, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.20020635303931683, |
| "grad_norm": 11.345588684082031, |
| "learning_rate": 8.297530864197531e-06, |
| "logits/chosen": 2.5486679077148438, |
| "logits/rejected": 2.4989380836486816, |
| "logps/chosen": -0.6900479197502136, |
| "logps/rejected": -2.254317045211792, |
| "loss": 0.7252, |
| "nll_loss": 0.07977007329463959, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.20701436698436737, |
| "rewards/margins": 0.4692806601524353, |
| "rewards/rejected": -0.6762951016426086, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.20108445107896297, |
| "grad_norm": 0.1733788251876831, |
| "learning_rate": 8.285185185185186e-06, |
| "logits/chosen": 2.6525139808654785, |
| "logits/rejected": 2.702258586883545, |
| "logps/chosen": -0.6712150573730469, |
| "logps/rejected": -3.3791470527648926, |
| "loss": 0.6285, |
| "nll_loss": 0.06629864871501923, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.20136454701423645, |
| "rewards/margins": 0.8123796582221985, |
| "rewards/rejected": -1.0137441158294678, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.20196254911860909, |
| "grad_norm": 2.2842109203338623, |
| "learning_rate": 8.27283950617284e-06, |
| "logits/chosen": 2.430759906768799, |
| "logits/rejected": 2.509899616241455, |
| "logps/chosen": -0.612421989440918, |
| "logps/rejected": -2.978832960128784, |
| "loss": 0.6656, |
| "nll_loss": 0.0659157857298851, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.18372659385204315, |
| "rewards/margins": 0.7099233865737915, |
| "rewards/rejected": -0.8936498761177063, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.20284064715825523, |
| "grad_norm": 6.801368713378906, |
| "learning_rate": 8.260493827160495e-06, |
| "logits/chosen": 2.424044609069824, |
| "logits/rejected": 2.4345576763153076, |
| "logps/chosen": -0.4499019682407379, |
| "logps/rejected": -3.497096300125122, |
| "loss": 0.5669, |
| "nll_loss": 0.03968087583780289, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.13497060537338257, |
| "rewards/margins": 0.9141584634780884, |
| "rewards/rejected": -1.0491290092468262, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.20371874519790134, |
| "grad_norm": 4.946938514709473, |
| "learning_rate": 8.248148148148149e-06, |
| "logits/chosen": 2.310943841934204, |
| "logits/rejected": 2.327831983566284, |
| "logps/chosen": -0.6226638555526733, |
| "logps/rejected": -5.598433971405029, |
| "loss": 0.4657, |
| "nll_loss": 0.053391944617033005, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.18679918348789215, |
| "rewards/margins": 1.4927312135696411, |
| "rewards/rejected": -1.679530382156372, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.20459684323754748, |
| "grad_norm": 24.650245666503906, |
| "learning_rate": 8.235802469135803e-06, |
| "logits/chosen": 2.0664007663726807, |
| "logits/rejected": 2.158742904663086, |
| "logps/chosen": -2.7977170944213867, |
| "logps/rejected": -6.907550811767578, |
| "loss": 1.5409, |
| "nll_loss": 0.6891128420829773, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.8393152356147766, |
| "rewards/margins": 1.2329500913619995, |
| "rewards/rejected": -2.072265148162842, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.2054749412771936, |
| "grad_norm": 2.8847906589508057, |
| "learning_rate": 8.223456790123458e-06, |
| "logits/chosen": 2.0425117015838623, |
| "logits/rejected": 2.046208381652832, |
| "logps/chosen": -0.8856005668640137, |
| "logps/rejected": -3.2776896953582764, |
| "loss": 0.7465, |
| "nll_loss": 0.10851933062076569, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.2656802237033844, |
| "rewards/margins": 0.717626690864563, |
| "rewards/rejected": -0.983306884765625, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.20635303931683974, |
| "grad_norm": 6.361351013183594, |
| "learning_rate": 8.211111111111112e-06, |
| "logits/chosen": 2.2434327602386475, |
| "logits/rejected": 2.3048999309539795, |
| "logps/chosen": -0.753734290599823, |
| "logps/rejected": -3.09961199760437, |
| "loss": 0.6477, |
| "nll_loss": 0.05653975531458855, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.22612027823925018, |
| "rewards/margins": 0.7037633657455444, |
| "rewards/rejected": -0.929883599281311, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.20723113735648585, |
| "grad_norm": 4.796877384185791, |
| "learning_rate": 8.198765432098767e-06, |
| "logits/chosen": 2.420621156692505, |
| "logits/rejected": 2.454742670059204, |
| "logps/chosen": -0.5336098670959473, |
| "logps/rejected": -2.5250916481018066, |
| "loss": 0.6861, |
| "nll_loss": 0.07172581553459167, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.16008298099040985, |
| "rewards/margins": 0.5974445343017578, |
| "rewards/rejected": -0.7575275897979736, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.208109235396132, |
| "grad_norm": 6.713689804077148, |
| "learning_rate": 8.186419753086421e-06, |
| "logits/chosen": 2.700634241104126, |
| "logits/rejected": 2.761862277984619, |
| "logps/chosen": -0.9955413937568665, |
| "logps/rejected": -3.3551056385040283, |
| "loss": 0.7475, |
| "nll_loss": 0.11970362812280655, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.2986624538898468, |
| "rewards/margins": 0.7078693509101868, |
| "rewards/rejected": -1.0065317153930664, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.2089873334357781, |
| "grad_norm": 5.509604454040527, |
| "learning_rate": 8.174074074074074e-06, |
| "logits/chosen": 2.601839780807495, |
| "logits/rejected": 2.6365180015563965, |
| "logps/chosen": -0.7382031679153442, |
| "logps/rejected": -2.511854648590088, |
| "loss": 0.6877, |
| "nll_loss": 0.058238618075847626, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.2214609682559967, |
| "rewards/margins": 0.5320954918861389, |
| "rewards/rejected": -0.7535563707351685, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.20986543147542422, |
| "grad_norm": 0.9899409413337708, |
| "learning_rate": 8.161728395061728e-06, |
| "logits/chosen": 2.59370493888855, |
| "logits/rejected": 2.653318166732788, |
| "logps/chosen": -0.43372973799705505, |
| "logps/rejected": -2.1165356636047363, |
| "loss": 0.6498, |
| "nll_loss": 0.055299948900938034, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.1301189363002777, |
| "rewards/margins": 0.5048418045043945, |
| "rewards/rejected": -0.6349607706069946, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.21074352951507036, |
| "grad_norm": 2.4443204402923584, |
| "learning_rate": 8.149382716049383e-06, |
| "logits/chosen": 2.621575355529785, |
| "logits/rejected": 2.6399495601654053, |
| "logps/chosen": -0.6197006702423096, |
| "logps/rejected": -2.7705483436584473, |
| "loss": 0.6775, |
| "nll_loss": 0.09681596606969833, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.1859102100133896, |
| "rewards/margins": 0.6452543139457703, |
| "rewards/rejected": -0.831164538860321, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.21162162755471647, |
| "grad_norm": 3.302237033843994, |
| "learning_rate": 8.137037037037037e-06, |
| "logits/chosen": 2.6685478687286377, |
| "logits/rejected": 2.645535469055176, |
| "logps/chosen": -0.5314685106277466, |
| "logps/rejected": -2.784475803375244, |
| "loss": 0.6021, |
| "nll_loss": 0.05336238071322441, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.1594405472278595, |
| "rewards/margins": 0.675902247428894, |
| "rewards/rejected": -0.8353427052497864, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.21249972559436262, |
| "grad_norm": 1.3908727169036865, |
| "learning_rate": 8.124691358024692e-06, |
| "logits/chosen": 2.372706413269043, |
| "logits/rejected": 2.4324469566345215, |
| "logps/chosen": -0.5971062779426575, |
| "logps/rejected": -2.159764051437378, |
| "loss": 0.6943, |
| "nll_loss": 0.05865710228681564, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.179131880402565, |
| "rewards/margins": 0.4687972664833069, |
| "rewards/rejected": -0.6479291915893555, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.21337782363400873, |
| "grad_norm": 2.2388875484466553, |
| "learning_rate": 8.112345679012346e-06, |
| "logits/chosen": 2.378962993621826, |
| "logits/rejected": 2.4710259437561035, |
| "logps/chosen": -0.6657778024673462, |
| "logps/rejected": -2.7295007705688477, |
| "loss": 0.6557, |
| "nll_loss": 0.07417738437652588, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.19973333179950714, |
| "rewards/margins": 0.6191169619560242, |
| "rewards/rejected": -0.8188502192497253, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.21425592167365487, |
| "grad_norm": 11.22681713104248, |
| "learning_rate": 8.1e-06, |
| "logits/chosen": 2.5654773712158203, |
| "logits/rejected": 2.5966382026672363, |
| "logps/chosen": -0.5373214483261108, |
| "logps/rejected": -2.4600396156311035, |
| "loss": 0.6436, |
| "nll_loss": 0.07008077204227448, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.16119642555713654, |
| "rewards/margins": 0.5768154263496399, |
| "rewards/rejected": -0.7380119562149048, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.21513401971330098, |
| "grad_norm": 3.76454496383667, |
| "learning_rate": 8.087654320987655e-06, |
| "logits/chosen": 2.541652202606201, |
| "logits/rejected": 2.5319390296936035, |
| "logps/chosen": -0.7174406051635742, |
| "logps/rejected": -2.493823528289795, |
| "loss": 0.694, |
| "nll_loss": 0.08293718844652176, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.21523217856884003, |
| "rewards/margins": 0.5329148173332214, |
| "rewards/rejected": -0.7481471300125122, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.21601211775294712, |
| "grad_norm": 6.552890777587891, |
| "learning_rate": 8.07530864197531e-06, |
| "logits/chosen": 2.4907002449035645, |
| "logits/rejected": 2.483581066131592, |
| "logps/chosen": -0.5098174214363098, |
| "logps/rejected": -2.033862829208374, |
| "loss": 0.6884, |
| "nll_loss": 0.06638985127210617, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.15294523537158966, |
| "rewards/margins": 0.4572136402130127, |
| "rewards/rejected": -0.6101589202880859, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.21689021579259324, |
| "grad_norm": 1.994023323059082, |
| "learning_rate": 8.062962962962964e-06, |
| "logits/chosen": 2.6207573413848877, |
| "logits/rejected": 2.5812675952911377, |
| "logps/chosen": -0.7166529297828674, |
| "logps/rejected": -2.042680263519287, |
| "loss": 0.673, |
| "nll_loss": 0.04922042042016983, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.21499589085578918, |
| "rewards/margins": 0.3978081941604614, |
| "rewards/rejected": -0.6128040552139282, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.21776831383223938, |
| "grad_norm": 1.6430639028549194, |
| "learning_rate": 8.050617283950618e-06, |
| "logits/chosen": 2.2188925743103027, |
| "logits/rejected": 2.256579875946045, |
| "logps/chosen": -0.4708133637905121, |
| "logps/rejected": -1.9442718029022217, |
| "loss": 0.643, |
| "nll_loss": 0.043398790061473846, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.14124402403831482, |
| "rewards/margins": 0.44203758239746094, |
| "rewards/rejected": -0.5832816362380981, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.2186464118718855, |
| "grad_norm": 2.5793867111206055, |
| "learning_rate": 8.038271604938272e-06, |
| "logits/chosen": 2.5893726348876953, |
| "logits/rejected": 2.5955262184143066, |
| "logps/chosen": -0.9521909952163696, |
| "logps/rejected": -2.2524361610412598, |
| "loss": 0.7949, |
| "nll_loss": 0.09755026549100876, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.28565728664398193, |
| "rewards/margins": 0.3900734782218933, |
| "rewards/rejected": -0.6757307648658752, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.21952450991153163, |
| "grad_norm": 3.0991997718811035, |
| "learning_rate": 8.025925925925927e-06, |
| "logits/chosen": 2.4973714351654053, |
| "logits/rejected": 2.5837717056274414, |
| "logps/chosen": -0.7727764248847961, |
| "logps/rejected": -1.6810334920883179, |
| "loss": 0.7202, |
| "nll_loss": 0.08514519035816193, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.23183290660381317, |
| "rewards/margins": 0.2724771797657013, |
| "rewards/rejected": -0.5043100714683533, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.22040260795117775, |
| "grad_norm": 0.889065682888031, |
| "learning_rate": 8.01358024691358e-06, |
| "logits/chosen": 2.623380422592163, |
| "logits/rejected": 2.6181862354278564, |
| "logps/chosen": -0.6605237126350403, |
| "logps/rejected": -2.3106017112731934, |
| "loss": 0.6626, |
| "nll_loss": 0.056938063353300095, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.19815710186958313, |
| "rewards/margins": 0.4950234889984131, |
| "rewards/rejected": -0.6931806206703186, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.2212807059908239, |
| "grad_norm": 1.9629164934158325, |
| "learning_rate": 8.001234567901234e-06, |
| "logits/chosen": 2.725886583328247, |
| "logits/rejected": 2.8111538887023926, |
| "logps/chosen": -0.9530594944953918, |
| "logps/rejected": -2.9670989513397217, |
| "loss": 0.6788, |
| "nll_loss": 0.08854852616786957, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.28591784834861755, |
| "rewards/margins": 0.6042118072509766, |
| "rewards/rejected": -0.8901296854019165, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.22215880403047, |
| "grad_norm": 2.8490424156188965, |
| "learning_rate": 7.98888888888889e-06, |
| "logits/chosen": 2.6199440956115723, |
| "logits/rejected": 2.5510783195495605, |
| "logps/chosen": -0.7600888013839722, |
| "logps/rejected": -2.3557190895080566, |
| "loss": 0.6933, |
| "nll_loss": 0.07890000194311142, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.22802665829658508, |
| "rewards/margins": 0.47868919372558594, |
| "rewards/rejected": -0.7067158222198486, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.22303690207011612, |
| "grad_norm": 2.2429914474487305, |
| "learning_rate": 7.976543209876545e-06, |
| "logits/chosen": 2.657155752182007, |
| "logits/rejected": 2.730384349822998, |
| "logps/chosen": -0.6944109201431274, |
| "logps/rejected": -2.3378891944885254, |
| "loss": 0.6597, |
| "nll_loss": 0.07231010496616364, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.20832328498363495, |
| "rewards/margins": 0.4930434226989746, |
| "rewards/rejected": -0.7013667821884155, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.22391500010976226, |
| "grad_norm": 8.76103401184082, |
| "learning_rate": 7.964197530864199e-06, |
| "logits/chosen": 2.425436019897461, |
| "logits/rejected": 2.432227849960327, |
| "logps/chosen": -0.8007850646972656, |
| "logps/rejected": -1.2312095165252686, |
| "loss": 0.8001, |
| "nll_loss": 0.08954410254955292, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.24023552238941193, |
| "rewards/margins": 0.1291273534297943, |
| "rewards/rejected": -0.36936289072036743, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.22479309814940837, |
| "grad_norm": 0.9743072986602783, |
| "learning_rate": 7.951851851851853e-06, |
| "logits/chosen": 2.5851752758026123, |
| "logits/rejected": 2.6295394897460938, |
| "logps/chosen": -0.5336810946464539, |
| "logps/rejected": -1.8357422351837158, |
| "loss": 0.6645, |
| "nll_loss": 0.0451740063726902, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.16010431945323944, |
| "rewards/margins": 0.39061832427978516, |
| "rewards/rejected": -0.5507226586341858, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.2256711961890545, |
| "grad_norm": 16.18039321899414, |
| "learning_rate": 7.939506172839508e-06, |
| "logits/chosen": 2.548182487487793, |
| "logits/rejected": 2.511976718902588, |
| "logps/chosen": -0.34628647565841675, |
| "logps/rejected": -1.9476335048675537, |
| "loss": 0.6362, |
| "nll_loss": 0.043379928916692734, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -0.1038859486579895, |
| "rewards/margins": 0.4804041385650635, |
| "rewards/rejected": -0.584290087223053, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.22654929422870063, |
| "grad_norm": 0.8062020540237427, |
| "learning_rate": 7.92716049382716e-06, |
| "logits/chosen": 2.4778008460998535, |
| "logits/rejected": 2.540804147720337, |
| "logps/chosen": -0.5255548357963562, |
| "logps/rejected": -2.09405779838562, |
| "loss": 0.6816, |
| "nll_loss": 0.07233087718486786, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.15766644477844238, |
| "rewards/margins": 0.4705510139465332, |
| "rewards/rejected": -0.6282175183296204, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.22742739226834677, |
| "grad_norm": 0.9400202631950378, |
| "learning_rate": 7.914814814814815e-06, |
| "logits/chosen": 2.3705923557281494, |
| "logits/rejected": 2.3822696208953857, |
| "logps/chosen": -0.29244324564933777, |
| "logps/rejected": -1.8687235116958618, |
| "loss": 0.643, |
| "nll_loss": 0.0392463319003582, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.08773298561573029, |
| "rewards/margins": 0.47288402915000916, |
| "rewards/rejected": -0.5606169700622559, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.22830549030799288, |
| "grad_norm": 2.667971134185791, |
| "learning_rate": 7.90246913580247e-06, |
| "logits/chosen": 2.3539376258850098, |
| "logits/rejected": 2.370870351791382, |
| "logps/chosen": -0.8493935465812683, |
| "logps/rejected": -2.638115882873535, |
| "loss": 0.7404, |
| "nll_loss": 0.10912100225687027, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.2548181116580963, |
| "rewards/margins": 0.5366166830062866, |
| "rewards/rejected": -0.7914347648620605, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.22918358834763902, |
| "grad_norm": 2.8281736373901367, |
| "learning_rate": 7.890123456790124e-06, |
| "logits/chosen": 2.146486759185791, |
| "logits/rejected": 2.068389415740967, |
| "logps/chosen": -0.5109766125679016, |
| "logps/rejected": -1.839223861694336, |
| "loss": 0.6687, |
| "nll_loss": 0.056281328201293945, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.15329298377037048, |
| "rewards/margins": 0.39847415685653687, |
| "rewards/rejected": -0.5517671704292297, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.23006168638728514, |
| "grad_norm": 0.8495587706565857, |
| "learning_rate": 7.877777777777778e-06, |
| "logits/chosen": 2.6396515369415283, |
| "logits/rejected": 2.7024216651916504, |
| "logps/chosen": -0.6976840496063232, |
| "logps/rejected": -2.1378865242004395, |
| "loss": 0.6814, |
| "nll_loss": 0.056956302374601364, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.20930524170398712, |
| "rewards/margins": 0.43206077814102173, |
| "rewards/rejected": -0.6413660049438477, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.23093978442693128, |
| "grad_norm": 3.5757088661193848, |
| "learning_rate": 7.865432098765433e-06, |
| "logits/chosen": 2.367114305496216, |
| "logits/rejected": 2.4596433639526367, |
| "logps/chosen": -0.8233789205551147, |
| "logps/rejected": -3.531553268432617, |
| "loss": 0.6048, |
| "nll_loss": 0.07218165695667267, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.24701371788978577, |
| "rewards/margins": 0.8124523162841797, |
| "rewards/rejected": -1.059466004371643, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.2318178824665774, |
| "grad_norm": 1.7588326930999756, |
| "learning_rate": 7.853086419753087e-06, |
| "logits/chosen": 2.2890639305114746, |
| "logits/rejected": 2.2860817909240723, |
| "logps/chosen": -0.31422901153564453, |
| "logps/rejected": -2.4267849922180176, |
| "loss": 0.584, |
| "nll_loss": 0.03038870170712471, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.09426870942115784, |
| "rewards/margins": 0.6337667107582092, |
| "rewards/rejected": -0.7280355095863342, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.23269598050622353, |
| "grad_norm": 4.534848213195801, |
| "learning_rate": 7.840740740740741e-06, |
| "logits/chosen": 2.2893667221069336, |
| "logits/rejected": 2.3961310386657715, |
| "logps/chosen": -1.1099700927734375, |
| "logps/rejected": -3.46467924118042, |
| "loss": 0.7172, |
| "nll_loss": 0.1096486896276474, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.3329910337924957, |
| "rewards/margins": 0.7064129114151001, |
| "rewards/rejected": -1.0394039154052734, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.23357407854586965, |
| "grad_norm": 4.673385143280029, |
| "learning_rate": 7.828395061728396e-06, |
| "logits/chosen": 2.296675205230713, |
| "logits/rejected": 2.2739694118499756, |
| "logps/chosen": -0.5930169820785522, |
| "logps/rejected": -2.1158299446105957, |
| "loss": 0.6747, |
| "nll_loss": 0.06488887220621109, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.1779050976037979, |
| "rewards/margins": 0.4568440020084381, |
| "rewards/rejected": -0.6347490549087524, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.23445217658551576, |
| "grad_norm": 4.619322776794434, |
| "learning_rate": 7.81604938271605e-06, |
| "logits/chosen": 2.316483974456787, |
| "logits/rejected": 2.2958438396453857, |
| "logps/chosen": -0.8736156225204468, |
| "logps/rejected": -2.8213837146759033, |
| "loss": 0.7145, |
| "nll_loss": 0.0816427692770958, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.2620847225189209, |
| "rewards/margins": 0.5843304395675659, |
| "rewards/rejected": -0.8464152216911316, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.2353302746251619, |
| "grad_norm": 2.9810938835144043, |
| "learning_rate": 7.803703703703705e-06, |
| "logits/chosen": 2.459575891494751, |
| "logits/rejected": 2.5659358501434326, |
| "logps/chosen": -0.7047882080078125, |
| "logps/rejected": -2.6978650093078613, |
| "loss": 0.6257, |
| "nll_loss": 0.048231981694698334, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.21143648028373718, |
| "rewards/margins": 0.5979229807853699, |
| "rewards/rejected": -0.8093594312667847, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.23620837266480801, |
| "grad_norm": 1.5284343957901, |
| "learning_rate": 7.791358024691359e-06, |
| "logits/chosen": 2.327259063720703, |
| "logits/rejected": 2.342414379119873, |
| "logps/chosen": -0.630928635597229, |
| "logps/rejected": -1.5021655559539795, |
| "loss": 0.749, |
| "nll_loss": 0.08067157119512558, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.18927858769893646, |
| "rewards/margins": 0.26137107610702515, |
| "rewards/rejected": -0.4506497383117676, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.23708647070445416, |
| "grad_norm": 2.211660385131836, |
| "learning_rate": 7.779012345679014e-06, |
| "logits/chosen": 2.6317856311798096, |
| "logits/rejected": 2.615809679031372, |
| "logps/chosen": -0.5954752564430237, |
| "logps/rejected": -2.3459115028381348, |
| "loss": 0.6778, |
| "nll_loss": 0.08433017879724503, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.17864257097244263, |
| "rewards/margins": 0.5251308679580688, |
| "rewards/rejected": -0.7037734389305115, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.23796456874410027, |
| "grad_norm": 1.088346004486084, |
| "learning_rate": 7.766666666666666e-06, |
| "logits/chosen": 2.3896658420562744, |
| "logits/rejected": 2.4483752250671387, |
| "logps/chosen": -0.9959812164306641, |
| "logps/rejected": -2.1024794578552246, |
| "loss": 0.7702, |
| "nll_loss": 0.07456602156162262, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.29879438877105713, |
| "rewards/margins": 0.33194953203201294, |
| "rewards/rejected": -0.6307438611984253, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.2388426667837464, |
| "grad_norm": 0.024926647543907166, |
| "learning_rate": 7.75432098765432e-06, |
| "logits/chosen": 2.4478306770324707, |
| "logits/rejected": 2.464536190032959, |
| "logps/chosen": -0.5027719736099243, |
| "logps/rejected": -2.434457540512085, |
| "loss": 0.6162, |
| "nll_loss": 0.037077441811561584, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.15083159506320953, |
| "rewards/margins": 0.5795056819915771, |
| "rewards/rejected": -0.730337381362915, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.23972076482339252, |
| "grad_norm": 2.2997021675109863, |
| "learning_rate": 7.741975308641975e-06, |
| "logits/chosen": 2.381772518157959, |
| "logits/rejected": 2.4063820838928223, |
| "logps/chosen": -0.2956869602203369, |
| "logps/rejected": -3.0764455795288086, |
| "loss": 0.5462, |
| "nll_loss": 0.0379708856344223, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.08870609104633331, |
| "rewards/margins": 0.834227442741394, |
| "rewards/rejected": -0.9229336977005005, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.24059886286303866, |
| "grad_norm": 0.1084788367152214, |
| "learning_rate": 7.72962962962963e-06, |
| "logits/chosen": 2.582411766052246, |
| "logits/rejected": 2.6022956371307373, |
| "logps/chosen": -0.8210613131523132, |
| "logps/rejected": -1.958987832069397, |
| "loss": 0.7403, |
| "nll_loss": 0.07586108148097992, |
| "rewards/accuracies": 0.5375000238418579, |
| "rewards/chosen": -0.24631838500499725, |
| "rewards/margins": 0.34137797355651855, |
| "rewards/rejected": -0.5876964330673218, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.24147696090268478, |
| "grad_norm": 3.7977356910705566, |
| "learning_rate": 7.717283950617284e-06, |
| "logits/chosen": 2.4403786659240723, |
| "logits/rejected": 2.398824453353882, |
| "logps/chosen": -0.5754778981208801, |
| "logps/rejected": -2.9741971492767334, |
| "loss": 0.631, |
| "nll_loss": 0.06641169637441635, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.17264336347579956, |
| "rewards/margins": 0.7196158170700073, |
| "rewards/rejected": -0.8922592401504517, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.24235505894233092, |
| "grad_norm": 1.7483222484588623, |
| "learning_rate": 7.70493827160494e-06, |
| "logits/chosen": 2.11322021484375, |
| "logits/rejected": 2.1725521087646484, |
| "logps/chosen": -0.5592783689498901, |
| "logps/rejected": -1.9913737773895264, |
| "loss": 0.671, |
| "nll_loss": 0.07330699265003204, |
| "rewards/accuracies": 0.512499988079071, |
| "rewards/chosen": -0.16778354346752167, |
| "rewards/margins": 0.4296286106109619, |
| "rewards/rejected": -0.5974121689796448, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.24323315698197703, |
| "grad_norm": 1.0311236381530762, |
| "learning_rate": 7.692592592592594e-06, |
| "logits/chosen": 2.309854507446289, |
| "logits/rejected": 2.313572406768799, |
| "logps/chosen": -0.6930335760116577, |
| "logps/rejected": -2.353086233139038, |
| "loss": 0.7228, |
| "nll_loss": 0.0806727483868599, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.20791009068489075, |
| "rewards/margins": 0.498015820980072, |
| "rewards/rejected": -0.7059258818626404, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.24411125502162317, |
| "grad_norm": 4.491925239562988, |
| "learning_rate": 7.680246913580247e-06, |
| "logits/chosen": 2.534341335296631, |
| "logits/rejected": 2.560044050216675, |
| "logps/chosen": -0.536239743232727, |
| "logps/rejected": -2.622220993041992, |
| "loss": 0.6372, |
| "nll_loss": 0.05687220022082329, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.1608719378709793, |
| "rewards/margins": 0.625794529914856, |
| "rewards/rejected": -0.7866664528846741, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.2449893530612693, |
| "grad_norm": 0.7264915108680725, |
| "learning_rate": 7.667901234567902e-06, |
| "logits/chosen": 2.2637343406677246, |
| "logits/rejected": 2.312181234359741, |
| "logps/chosen": -0.7516659498214722, |
| "logps/rejected": -1.9516077041625977, |
| "loss": 0.7264, |
| "nll_loss": 0.07927460223436356, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.22549979388713837, |
| "rewards/margins": 0.3599824607372284, |
| "rewards/rejected": -0.5854822993278503, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.24586745110091543, |
| "grad_norm": 3.0847902297973633, |
| "learning_rate": 7.655555555555556e-06, |
| "logits/chosen": 2.367601156234741, |
| "logits/rejected": 2.403787612915039, |
| "logps/chosen": -0.41557034850120544, |
| "logps/rejected": -2.6111361980438232, |
| "loss": 0.6177, |
| "nll_loss": 0.04188116267323494, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": -0.12467111647129059, |
| "rewards/margins": 0.6586698889732361, |
| "rewards/rejected": -0.7833409905433655, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.24674554914056154, |
| "grad_norm": 4.381664752960205, |
| "learning_rate": 7.64320987654321e-06, |
| "logits/chosen": 2.159615993499756, |
| "logits/rejected": 2.1967501640319824, |
| "logps/chosen": -0.8560575246810913, |
| "logps/rejected": -2.5341227054595947, |
| "loss": 0.6988, |
| "nll_loss": 0.0791650265455246, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.2568172812461853, |
| "rewards/margins": 0.5034195780754089, |
| "rewards/rejected": -0.7602368593215942, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.24762364718020766, |
| "grad_norm": 1.5694470405578613, |
| "learning_rate": 7.630864197530865e-06, |
| "logits/chosen": 2.573787212371826, |
| "logits/rejected": 2.6199076175689697, |
| "logps/chosen": -0.3929263949394226, |
| "logps/rejected": -2.310509443283081, |
| "loss": 0.5938, |
| "nll_loss": 0.04064936563372612, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.11787792295217514, |
| "rewards/margins": 0.5752750039100647, |
| "rewards/rejected": -0.6931529641151428, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.2485017452198538, |
| "grad_norm": 0.7673569321632385, |
| "learning_rate": 7.618518518518519e-06, |
| "logits/chosen": 2.1453781127929688, |
| "logits/rejected": 2.2463371753692627, |
| "logps/chosen": -0.40726566314697266, |
| "logps/rejected": -2.0054023265838623, |
| "loss": 0.658, |
| "nll_loss": 0.048675037920475006, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.12217970192432404, |
| "rewards/margins": 0.4794410765171051, |
| "rewards/rejected": -0.6016206741333008, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.2493798432594999, |
| "grad_norm": 0.3016711473464966, |
| "learning_rate": 7.606172839506173e-06, |
| "logits/chosen": 2.3802428245544434, |
| "logits/rejected": 2.4284090995788574, |
| "logps/chosen": -0.2703506350517273, |
| "logps/rejected": -2.1822762489318848, |
| "loss": 0.6137, |
| "nll_loss": 0.03810378909111023, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.08110519498586655, |
| "rewards/margins": 0.5735777020454407, |
| "rewards/rejected": -0.6546828746795654, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.250257941299146, |
| "grad_norm": 7.489548683166504, |
| "learning_rate": 7.593827160493827e-06, |
| "logits/chosen": 2.248429298400879, |
| "logits/rejected": 2.308821678161621, |
| "logps/chosen": -0.8789108991622925, |
| "logps/rejected": -3.1246941089630127, |
| "loss": 0.664, |
| "nll_loss": 0.07486443221569061, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.2636732757091522, |
| "rewards/margins": 0.6737349033355713, |
| "rewards/rejected": -0.9374082684516907, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.2511360393387922, |
| "grad_norm": 1.4026418924331665, |
| "learning_rate": 7.581481481481482e-06, |
| "logits/chosen": 2.4665749073028564, |
| "logits/rejected": 2.4932830333709717, |
| "logps/chosen": -0.6312376260757446, |
| "logps/rejected": -2.414559841156006, |
| "loss": 0.6769, |
| "nll_loss": 0.043715715408325195, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.18937130272388458, |
| "rewards/margins": 0.5349966883659363, |
| "rewards/rejected": -0.7243679761886597, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.2520141373784383, |
| "grad_norm": 6.41206169128418, |
| "learning_rate": 7.569135802469136e-06, |
| "logits/chosen": 1.9791500568389893, |
| "logits/rejected": 2.065732479095459, |
| "logps/chosen": -0.3990306854248047, |
| "logps/rejected": -2.8924102783203125, |
| "loss": 0.5879, |
| "nll_loss": 0.03257184475660324, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.11970920860767365, |
| "rewards/margins": 0.7480138540267944, |
| "rewards/rejected": -0.8677231073379517, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.2528922354180844, |
| "grad_norm": 0.007237335667014122, |
| "learning_rate": 7.5567901234567905e-06, |
| "logits/chosen": 2.199582099914551, |
| "logits/rejected": 2.2399208545684814, |
| "logps/chosen": -0.5396376848220825, |
| "logps/rejected": -2.956026554107666, |
| "loss": 0.6215, |
| "nll_loss": 0.05001888796687126, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.16189131140708923, |
| "rewards/margins": 0.7249167561531067, |
| "rewards/rejected": -0.8868080377578735, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.25377033345773053, |
| "grad_norm": 1.490009069442749, |
| "learning_rate": 7.544444444444445e-06, |
| "logits/chosen": 2.2380213737487793, |
| "logits/rejected": 2.329550266265869, |
| "logps/chosen": -0.5914583206176758, |
| "logps/rejected": -3.31396484375, |
| "loss": 0.6415, |
| "nll_loss": 0.07680721580982208, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.17743751406669617, |
| "rewards/margins": 0.8167519569396973, |
| "rewards/rejected": -0.9941895604133606, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.2546484314973767, |
| "grad_norm": 4.014424800872803, |
| "learning_rate": 7.5320987654321e-06, |
| "logits/chosen": 1.9832671880722046, |
| "logits/rejected": 2.1117234230041504, |
| "logps/chosen": -0.7208765745162964, |
| "logps/rejected": -2.1712794303894043, |
| "loss": 0.7748, |
| "nll_loss": 0.10638797283172607, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.21626298129558563, |
| "rewards/margins": 0.43512091040611267, |
| "rewards/rejected": -0.6513839364051819, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.2555265295370228, |
| "grad_norm": 0.9837947487831116, |
| "learning_rate": 7.519753086419753e-06, |
| "logits/chosen": 2.266629695892334, |
| "logits/rejected": 2.3368468284606934, |
| "logps/chosen": -0.5821598172187805, |
| "logps/rejected": -3.631063938140869, |
| "loss": 0.622, |
| "nll_loss": 0.04266344755887985, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.1746479570865631, |
| "rewards/margins": 0.9146712422370911, |
| "rewards/rejected": -1.0893189907073975, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.25640462757666893, |
| "grad_norm": 7.946182727813721, |
| "learning_rate": 7.507407407407407e-06, |
| "logits/chosen": 2.081207275390625, |
| "logits/rejected": 2.123415946960449, |
| "logps/chosen": -0.6722933053970337, |
| "logps/rejected": -3.213026762008667, |
| "loss": 0.6752, |
| "nll_loss": 0.09485231339931488, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": -0.2016880065202713, |
| "rewards/margins": 0.762220025062561, |
| "rewards/rejected": -0.9639080762863159, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.25728272561631504, |
| "grad_norm": 3.1857895851135254, |
| "learning_rate": 7.495061728395062e-06, |
| "logits/chosen": 2.0671546459198, |
| "logits/rejected": 2.1429953575134277, |
| "logps/chosen": -1.1341451406478882, |
| "logps/rejected": -3.0532522201538086, |
| "loss": 0.7432, |
| "nll_loss": 0.07530729472637177, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.34024354815483093, |
| "rewards/margins": 0.5757321119308472, |
| "rewards/rejected": -0.9159756898880005, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.2581608236559612, |
| "grad_norm": 5.178243637084961, |
| "learning_rate": 7.482716049382717e-06, |
| "logits/chosen": 2.082383394241333, |
| "logits/rejected": 2.076977252960205, |
| "logps/chosen": -0.3990221321582794, |
| "logps/rejected": -1.7076250314712524, |
| "loss": 0.6855, |
| "nll_loss": 0.06153715401887894, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.1197066530585289, |
| "rewards/margins": 0.39258089661598206, |
| "rewards/rejected": -0.5122874975204468, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.2590389216956073, |
| "grad_norm": 1.9402194023132324, |
| "learning_rate": 7.4703703703703715e-06, |
| "logits/chosen": 2.0955326557159424, |
| "logits/rejected": 2.119300603866577, |
| "logps/chosen": -0.48326557874679565, |
| "logps/rejected": -2.141892194747925, |
| "loss": 0.6575, |
| "nll_loss": 0.05360071733593941, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.14497968554496765, |
| "rewards/margins": 0.49758806824684143, |
| "rewards/rejected": -0.6425677537918091, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.25991701973525344, |
| "grad_norm": 2.156696081161499, |
| "learning_rate": 7.458024691358026e-06, |
| "logits/chosen": 2.2110159397125244, |
| "logits/rejected": 2.210599422454834, |
| "logps/chosen": -0.7016893625259399, |
| "logps/rejected": -1.9251207113265991, |
| "loss": 0.7172, |
| "nll_loss": 0.06567586958408356, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.21050682663917542, |
| "rewards/margins": 0.36702945828437805, |
| "rewards/rejected": -0.5775362253189087, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.26079511777489955, |
| "grad_norm": 4.643383026123047, |
| "learning_rate": 7.44567901234568e-06, |
| "logits/chosen": 2.3343098163604736, |
| "logits/rejected": 2.3111183643341064, |
| "logps/chosen": -0.546606183052063, |
| "logps/rejected": -1.9335724115371704, |
| "loss": 0.6684, |
| "nll_loss": 0.04829854518175125, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.1639818698167801, |
| "rewards/margins": 0.4160899519920349, |
| "rewards/rejected": -0.5800718069076538, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.26167321581454567, |
| "grad_norm": 3.739795684814453, |
| "learning_rate": 7.433333333333334e-06, |
| "logits/chosen": 2.259247303009033, |
| "logits/rejected": 2.3139965534210205, |
| "logps/chosen": -0.6394690871238708, |
| "logps/rejected": -3.2223987579345703, |
| "loss": 0.6667, |
| "nll_loss": 0.07620217651128769, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.1918407380580902, |
| "rewards/margins": 0.774878978729248, |
| "rewards/rejected": -0.9667198061943054, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.26255131385419184, |
| "grad_norm": 3.2304673194885254, |
| "learning_rate": 7.420987654320988e-06, |
| "logits/chosen": 2.221369504928589, |
| "logits/rejected": 2.2490549087524414, |
| "logps/chosen": -0.3039132356643677, |
| "logps/rejected": -3.022956371307373, |
| "loss": 0.5563, |
| "nll_loss": 0.030767951160669327, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.09117396920919418, |
| "rewards/margins": 0.8157129287719727, |
| "rewards/rejected": -0.9068870544433594, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.26342941189383795, |
| "grad_norm": 1.1860368251800537, |
| "learning_rate": 7.408641975308643e-06, |
| "logits/chosen": 2.0464189052581787, |
| "logits/rejected": 2.0965323448181152, |
| "logps/chosen": -0.32864516973495483, |
| "logps/rejected": -2.536147356033325, |
| "loss": 0.617, |
| "nll_loss": 0.044617362320423126, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.09859354794025421, |
| "rewards/margins": 0.6622506380081177, |
| "rewards/rejected": -0.7608442306518555, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.26430750993348406, |
| "grad_norm": 1.1873282194137573, |
| "learning_rate": 7.396296296296297e-06, |
| "logits/chosen": 2.152050256729126, |
| "logits/rejected": 2.2446908950805664, |
| "logps/chosen": -0.4739529490470886, |
| "logps/rejected": -3.3722312450408936, |
| "loss": 0.6149, |
| "nll_loss": 0.06702496111392975, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.14218589663505554, |
| "rewards/margins": 0.8694835901260376, |
| "rewards/rejected": -1.011669397354126, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.2651856079731302, |
| "grad_norm": 4.482595920562744, |
| "learning_rate": 7.3839506172839516e-06, |
| "logits/chosen": 2.094207286834717, |
| "logits/rejected": 2.200845241546631, |
| "logps/chosen": -1.2236783504486084, |
| "logps/rejected": -4.37764835357666, |
| "loss": 0.6561, |
| "nll_loss": 0.09786146134138107, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -0.3671035170555115, |
| "rewards/margins": 0.9461910128593445, |
| "rewards/rejected": -1.3132946491241455, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.26606370601277635, |
| "grad_norm": 0.8290062546730042, |
| "learning_rate": 7.371604938271606e-06, |
| "logits/chosen": 1.9782556295394897, |
| "logits/rejected": 2.009742021560669, |
| "logps/chosen": -0.425149142742157, |
| "logps/rejected": -3.8111705780029297, |
| "loss": 0.5745, |
| "nll_loss": 0.042985234409570694, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.12754476070404053, |
| "rewards/margins": 1.0158064365386963, |
| "rewards/rejected": -1.1433511972427368, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.26694180405242246, |
| "grad_norm": 7.664605140686035, |
| "learning_rate": 7.3592592592592595e-06, |
| "logits/chosen": 2.083406448364258, |
| "logits/rejected": 2.1995325088500977, |
| "logps/chosen": -1.0140199661254883, |
| "logps/rejected": -3.976250410079956, |
| "loss": 0.759, |
| "nll_loss": 0.07868107408285141, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.3042060136795044, |
| "rewards/margins": 0.8886691927909851, |
| "rewards/rejected": -1.1928752660751343, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.2678199020920686, |
| "grad_norm": 5.215522766113281, |
| "learning_rate": 7.346913580246914e-06, |
| "logits/chosen": 2.006986141204834, |
| "logits/rejected": 2.1079816818237305, |
| "logps/chosen": -0.6254408955574036, |
| "logps/rejected": -2.870253801345825, |
| "loss": 0.6843, |
| "nll_loss": 0.07333754748106003, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.1876322478055954, |
| "rewards/margins": 0.6734437346458435, |
| "rewards/rejected": -0.8610760569572449, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.2686980001317147, |
| "grad_norm": 1.4749705791473389, |
| "learning_rate": 7.334567901234568e-06, |
| "logits/chosen": 2.1230270862579346, |
| "logits/rejected": 2.2760751247406006, |
| "logps/chosen": -0.9829801321029663, |
| "logps/rejected": -2.8155903816223145, |
| "loss": 0.7342, |
| "nll_loss": 0.10188277065753937, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.2948940396308899, |
| "rewards/margins": 0.5497831702232361, |
| "rewards/rejected": -0.8446771502494812, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.26957609817136086, |
| "grad_norm": 9.571310043334961, |
| "learning_rate": 7.322222222222223e-06, |
| "logits/chosen": 2.225337028503418, |
| "logits/rejected": 2.2470784187316895, |
| "logps/chosen": -0.7389670014381409, |
| "logps/rejected": -2.8324790000915527, |
| "loss": 0.6649, |
| "nll_loss": 0.0714489072561264, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.2216901034116745, |
| "rewards/margins": 0.628053605556488, |
| "rewards/rejected": -0.8497437238693237, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.27045419621100697, |
| "grad_norm": 1.4797980785369873, |
| "learning_rate": 7.309876543209877e-06, |
| "logits/chosen": 2.0399069786071777, |
| "logits/rejected": 2.1479554176330566, |
| "logps/chosen": -0.7341340780258179, |
| "logps/rejected": -2.218003749847412, |
| "loss": 0.7062, |
| "nll_loss": 0.057169754058122635, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.22024023532867432, |
| "rewards/margins": 0.4451608657836914, |
| "rewards/rejected": -0.6654011011123657, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.2713322942506531, |
| "grad_norm": 11.532062530517578, |
| "learning_rate": 7.297530864197532e-06, |
| "logits/chosen": 2.1539671421051025, |
| "logits/rejected": 2.135166645050049, |
| "logps/chosen": -0.9680719375610352, |
| "logps/rejected": -2.85876202583313, |
| "loss": 0.7423, |
| "nll_loss": 0.0898386538028717, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.2904215455055237, |
| "rewards/margins": 0.5672070980072021, |
| "rewards/rejected": -0.8576286435127258, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.2722103922902992, |
| "grad_norm": 0.5979923605918884, |
| "learning_rate": 7.285185185185186e-06, |
| "logits/chosen": 2.2366061210632324, |
| "logits/rejected": 2.297550916671753, |
| "logps/chosen": -0.9880453944206238, |
| "logps/rejected": -3.887558698654175, |
| "loss": 0.6517, |
| "nll_loss": 0.08710993081331253, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.2964136004447937, |
| "rewards/margins": 0.8698541522026062, |
| "rewards/rejected": -1.1662677526474, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.27308849032994537, |
| "grad_norm": 6.401426792144775, |
| "learning_rate": 7.27283950617284e-06, |
| "logits/chosen": 2.1483216285705566, |
| "logits/rejected": 2.219287395477295, |
| "logps/chosen": -0.8323481678962708, |
| "logps/rejected": -1.9748185873031616, |
| "loss": 0.7784, |
| "nll_loss": 0.11523783206939697, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.2497044801712036, |
| "rewards/margins": 0.34274110198020935, |
| "rewards/rejected": -0.5924455523490906, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.2739665883695915, |
| "grad_norm": 2.1834819316864014, |
| "learning_rate": 7.260493827160494e-06, |
| "logits/chosen": 2.21921968460083, |
| "logits/rejected": 2.247816801071167, |
| "logps/chosen": -0.8979324102401733, |
| "logps/rejected": -2.4821817874908447, |
| "loss": 0.753, |
| "nll_loss": 0.09049418568611145, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.26937970519065857, |
| "rewards/margins": 0.47527486085891724, |
| "rewards/rejected": -0.7446545362472534, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.2748446864092376, |
| "grad_norm": 2.105536937713623, |
| "learning_rate": 7.2481481481481485e-06, |
| "logits/chosen": 2.3172378540039062, |
| "logits/rejected": 2.3811116218566895, |
| "logps/chosen": -0.33308374881744385, |
| "logps/rejected": -1.8823559284210205, |
| "loss": 0.6691, |
| "nll_loss": 0.04073493555188179, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.09992513060569763, |
| "rewards/margins": 0.4647817015647888, |
| "rewards/rejected": -0.5647068023681641, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.2757227844488837, |
| "grad_norm": 5.6695556640625, |
| "learning_rate": 7.235802469135803e-06, |
| "logits/chosen": 2.484741687774658, |
| "logits/rejected": 2.524019718170166, |
| "logps/chosen": -0.6708775758743286, |
| "logps/rejected": -2.733649730682373, |
| "loss": 0.675, |
| "nll_loss": 0.06934002041816711, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -0.20126327872276306, |
| "rewards/margins": 0.6188317537307739, |
| "rewards/rejected": -0.8200949430465698, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.2766008824885298, |
| "grad_norm": 0.11841005086898804, |
| "learning_rate": 7.223456790123457e-06, |
| "logits/chosen": 2.3487932682037354, |
| "logits/rejected": 2.4092326164245605, |
| "logps/chosen": -0.5150425434112549, |
| "logps/rejected": -1.9013131856918335, |
| "loss": 0.7062, |
| "nll_loss": 0.05403406545519829, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.15451276302337646, |
| "rewards/margins": 0.4158812463283539, |
| "rewards/rejected": -0.570393979549408, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.277478980528176, |
| "grad_norm": 2.9380528926849365, |
| "learning_rate": 7.211111111111112e-06, |
| "logits/chosen": 2.3656086921691895, |
| "logits/rejected": 2.4282584190368652, |
| "logps/chosen": -0.6937441825866699, |
| "logps/rejected": -2.480012893676758, |
| "loss": 0.701, |
| "nll_loss": 0.07696821540594101, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.20812325179576874, |
| "rewards/margins": 0.5358806848526001, |
| "rewards/rejected": -0.74400395154953, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.2783570785678221, |
| "grad_norm": 5.344911575317383, |
| "learning_rate": 7.198765432098766e-06, |
| "logits/chosen": 2.2785983085632324, |
| "logits/rejected": 2.3243610858917236, |
| "logps/chosen": -0.7472286224365234, |
| "logps/rejected": -1.1668407917022705, |
| "loss": 0.7779, |
| "nll_loss": 0.07855083793401718, |
| "rewards/accuracies": 0.5375000238418579, |
| "rewards/chosen": -0.22416862845420837, |
| "rewards/margins": 0.12588365375995636, |
| "rewards/rejected": -0.35005226731300354, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.2792351766074682, |
| "grad_norm": 0.625324547290802, |
| "learning_rate": 7.18641975308642e-06, |
| "logits/chosen": 2.2220301628112793, |
| "logits/rejected": 2.2504611015319824, |
| "logps/chosen": -0.7789251208305359, |
| "logps/rejected": -2.3314409255981445, |
| "loss": 0.7352, |
| "nll_loss": 0.06989389657974243, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.23367755115032196, |
| "rewards/margins": 0.4657546877861023, |
| "rewards/rejected": -0.6994322538375854, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.28011327464711433, |
| "grad_norm": 2.6372480392456055, |
| "learning_rate": 7.174074074074074e-06, |
| "logits/chosen": 2.337601900100708, |
| "logits/rejected": 2.3855247497558594, |
| "logps/chosen": -0.4849260747432709, |
| "logps/rejected": -2.1613705158233643, |
| "loss": 0.6273, |
| "nll_loss": 0.052817367017269135, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.14547783136367798, |
| "rewards/margins": 0.5029333829879761, |
| "rewards/rejected": -0.6484112739562988, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.2809913726867605, |
| "grad_norm": 2.161203145980835, |
| "learning_rate": 7.1617283950617285e-06, |
| "logits/chosen": 2.1061933040618896, |
| "logits/rejected": 2.1634392738342285, |
| "logps/chosen": -0.5152336955070496, |
| "logps/rejected": -2.2968502044677734, |
| "loss": 0.6311, |
| "nll_loss": 0.04893555864691734, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -0.15457013249397278, |
| "rewards/margins": 0.5344849824905396, |
| "rewards/rejected": -0.6890550851821899, |
| "step": 3200 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 9000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|