| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 400, | |
| "global_step": 564, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.008865248226950355, | |
| "grad_norm": 145.79384079725077, | |
| "learning_rate": 5.087719298245614e-09, | |
| "logps/chosen": -2.8927152156829834, | |
| "logps/rejected": -0.7171114683151245, | |
| "loss": 25.359, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -28.927154541015625, | |
| "rewards/margins": -21.75603485107422, | |
| "rewards/rejected": -7.171114444732666, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.01773049645390071, | |
| "grad_norm": 137.86746181179828, | |
| "learning_rate": 1.0175438596491228e-08, | |
| "logps/chosen": -2.677199602127075, | |
| "logps/rejected": -0.7770185470581055, | |
| "loss": 24.5807, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.77199363708496, | |
| "rewards/margins": -19.00181007385254, | |
| "rewards/rejected": -7.7701849937438965, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.026595744680851064, | |
| "grad_norm": 152.88578139071717, | |
| "learning_rate": 1.5263157894736843e-08, | |
| "logps/chosen": -2.4912009239196777, | |
| "logps/rejected": -0.7790744304656982, | |
| "loss": 24.377, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -24.912012100219727, | |
| "rewards/margins": -17.121267318725586, | |
| "rewards/rejected": -7.790743350982666, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.03546099290780142, | |
| "grad_norm": 167.82396462459522, | |
| "learning_rate": 2.0350877192982456e-08, | |
| "logps/chosen": -2.8186190128326416, | |
| "logps/rejected": -0.8183754086494446, | |
| "loss": 24.5781, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -28.18619155883789, | |
| "rewards/margins": -20.002437591552734, | |
| "rewards/rejected": -8.183753967285156, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.044326241134751775, | |
| "grad_norm": 161.63607857249784, | |
| "learning_rate": 2.543859649122807e-08, | |
| "logps/chosen": -3.061793804168701, | |
| "logps/rejected": -0.6853266954421997, | |
| "loss": 24.7468, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -30.61794090270996, | |
| "rewards/margins": -23.764671325683594, | |
| "rewards/rejected": -6.853266716003418, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.05319148936170213, | |
| "grad_norm": 146.78761234739966, | |
| "learning_rate": 3.0526315789473686e-08, | |
| "logps/chosen": -2.6459131240844727, | |
| "logps/rejected": -0.7572848200798035, | |
| "loss": 24.7645, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.459131240844727, | |
| "rewards/margins": -18.886281967163086, | |
| "rewards/rejected": -7.572848320007324, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06205673758865248, | |
| "grad_norm": 155.4267473642588, | |
| "learning_rate": 3.56140350877193e-08, | |
| "logps/chosen": -2.725562572479248, | |
| "logps/rejected": -0.7676559090614319, | |
| "loss": 24.0294, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.255626678466797, | |
| "rewards/margins": -19.57906723022461, | |
| "rewards/rejected": -7.676558494567871, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.07092198581560284, | |
| "grad_norm": 144.6233545852746, | |
| "learning_rate": 4.070175438596491e-08, | |
| "logps/chosen": -2.8725643157958984, | |
| "logps/rejected": -0.765281081199646, | |
| "loss": 24.9378, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -28.725643157958984, | |
| "rewards/margins": -21.072834014892578, | |
| "rewards/rejected": -7.652810573577881, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0797872340425532, | |
| "grad_norm": 136.54468141271204, | |
| "learning_rate": 4.578947368421053e-08, | |
| "logps/chosen": -2.6484568119049072, | |
| "logps/rejected": -0.7622822523117065, | |
| "loss": 24.9312, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.48456382751465, | |
| "rewards/margins": -18.861743927001953, | |
| "rewards/rejected": -7.6228227615356445, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.08865248226950355, | |
| "grad_norm": 137.79908153328165, | |
| "learning_rate": 5.087719298245614e-08, | |
| "logps/chosen": -2.7798712253570557, | |
| "logps/rejected": -0.7086225748062134, | |
| "loss": 23.7831, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.7987117767334, | |
| "rewards/margins": -20.712488174438477, | |
| "rewards/rejected": -7.086225986480713, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0975177304964539, | |
| "grad_norm": 144.2107107633718, | |
| "learning_rate": 5.596491228070176e-08, | |
| "logps/chosen": -2.7623817920684814, | |
| "logps/rejected": -0.766534686088562, | |
| "loss": 24.5744, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.62381935119629, | |
| "rewards/margins": -19.95846939086914, | |
| "rewards/rejected": -7.665347099304199, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.10638297872340426, | |
| "grad_norm": 154.55501465614182, | |
| "learning_rate": 5.799498949116746e-08, | |
| "logps/chosen": -2.6049671173095703, | |
| "logps/rejected": -0.789307177066803, | |
| "loss": 24.1007, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.049671173095703, | |
| "rewards/margins": -18.156597137451172, | |
| "rewards/rejected": -7.893072605133057, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.11524822695035461, | |
| "grad_norm": 143.91644801228858, | |
| "learning_rate": 5.796437598480915e-08, | |
| "logps/chosen": -2.6897542476654053, | |
| "logps/rejected": -0.7617594599723816, | |
| "loss": 24.5552, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.89754295349121, | |
| "rewards/margins": -19.279949188232422, | |
| "rewards/rejected": -7.617594242095947, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.12411347517730496, | |
| "grad_norm": 140.27765213480782, | |
| "learning_rate": 5.7905961935892097e-08, | |
| "logps/chosen": -2.884532928466797, | |
| "logps/rejected": -0.7829927802085876, | |
| "loss": 25.3716, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -28.845327377319336, | |
| "rewards/margins": -21.01540184020996, | |
| "rewards/rejected": -7.829927921295166, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.13297872340425532, | |
| "grad_norm": 134.95151563701398, | |
| "learning_rate": 5.781980341129838e-08, | |
| "logps/chosen": -2.551480531692505, | |
| "logps/rejected": -0.7440924644470215, | |
| "loss": 24.9724, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -25.51480484008789, | |
| "rewards/margins": -18.073881149291992, | |
| "rewards/rejected": -7.440924167633057, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.14184397163120568, | |
| "grad_norm": 148.52723967100297, | |
| "learning_rate": 5.770598310756983e-08, | |
| "logps/chosen": -2.5896859169006348, | |
| "logps/rejected": -0.7434027791023254, | |
| "loss": 24.4619, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -25.896862030029297, | |
| "rewards/margins": -18.462833404541016, | |
| "rewards/rejected": -7.434027194976807, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.15070921985815602, | |
| "grad_norm": 129.82509761088346, | |
| "learning_rate": 5.7564610271534306e-08, | |
| "logps/chosen": -2.7586374282836914, | |
| "logps/rejected": -0.7194596529006958, | |
| "loss": 24.4028, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.586376190185547, | |
| "rewards/margins": -20.39177894592285, | |
| "rewards/rejected": -7.194596290588379, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.1595744680851064, | |
| "grad_norm": 133.51503125792178, | |
| "learning_rate": 5.7395820595448646e-08, | |
| "logps/chosen": -2.647547483444214, | |
| "logps/rejected": -0.8552893400192261, | |
| "loss": 23.7569, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.475473403930664, | |
| "rewards/margins": -17.92258071899414, | |
| "rewards/rejected": -8.552892684936523, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.16843971631205673, | |
| "grad_norm": 146.6456426617639, | |
| "learning_rate": 5.719977608675869e-08, | |
| "logps/chosen": -2.802583694458008, | |
| "logps/rejected": -0.8267061114311218, | |
| "loss": 24.7453, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -28.025836944580078, | |
| "rewards/margins": -19.758777618408203, | |
| "rewards/rejected": -8.267061233520508, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.1773049645390071, | |
| "grad_norm": 141.21078965475047, | |
| "learning_rate": 5.697666491260153e-08, | |
| "logps/chosen": -2.8149936199188232, | |
| "logps/rejected": -0.7728549242019653, | |
| "loss": 24.3411, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -28.149938583374023, | |
| "rewards/margins": -20.42138671875, | |
| "rewards/rejected": -7.728548526763916, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.18617021276595744, | |
| "grad_norm": 139.6431133606685, | |
| "learning_rate": 5.6726701219199265e-08, | |
| "logps/chosen": -2.575775623321533, | |
| "logps/rejected": -0.8046213388442993, | |
| "loss": 24.0486, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -25.757755279541016, | |
| "rewards/margins": -17.71154022216797, | |
| "rewards/rejected": -8.046213150024414, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.1950354609929078, | |
| "grad_norm": 142.62971890333534, | |
| "learning_rate": 5.6450124926317493e-08, | |
| "logps/chosen": -2.6268563270568848, | |
| "logps/rejected": -0.7895797491073608, | |
| "loss": 23.2861, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.268564224243164, | |
| "rewards/margins": -18.372766494750977, | |
| "rewards/rejected": -7.8957977294921875, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.20390070921985815, | |
| "grad_norm": 135.53231582386616, | |
| "learning_rate": 5.614720149698586e-08, | |
| "logps/chosen": -2.7986276149749756, | |
| "logps/rejected": -0.7632648944854736, | |
| "loss": 24.5058, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.986278533935547, | |
| "rewards/margins": -20.35363006591797, | |
| "rewards/rejected": -7.6326494216918945, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.2127659574468085, | |
| "grad_norm": 138.26827689206286, | |
| "learning_rate": 5.581822168270177e-08, | |
| "logps/chosen": -2.7848868370056152, | |
| "logps/rejected": -0.7665807604789734, | |
| "loss": 24.5205, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.8488712310791, | |
| "rewards/margins": -20.183063507080078, | |
| "rewards/rejected": -7.665807247161865, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.22163120567375885, | |
| "grad_norm": 139.86127105009692, | |
| "learning_rate": 5.546350124436175e-08, | |
| "logps/chosen": -2.8134217262268066, | |
| "logps/rejected": -0.8269641995429993, | |
| "loss": 23.8557, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -28.134220123291016, | |
| "rewards/margins": -19.864574432373047, | |
| "rewards/rejected": -8.269640922546387, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.23049645390070922, | |
| "grad_norm": 129.00203142586403, | |
| "learning_rate": 5.508338064918828e-08, | |
| "logps/chosen": -2.7570581436157227, | |
| "logps/rejected": -0.7763508558273315, | |
| "loss": 24.5182, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.570581436157227, | |
| "rewards/margins": -19.807071685791016, | |
| "rewards/rejected": -7.7635087966918945, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.2393617021276596, | |
| "grad_norm": 136.75890975025638, | |
| "learning_rate": 5.467822474394309e-08, | |
| "logps/chosen": -2.733191967010498, | |
| "logps/rejected": -0.7583021521568298, | |
| "loss": 23.358, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.331918716430664, | |
| "rewards/margins": -19.748897552490234, | |
| "rewards/rejected": -7.583021640777588, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.24822695035460993, | |
| "grad_norm": 128.45808116183338, | |
| "learning_rate": 5.42484224047405e-08, | |
| "logps/chosen": -2.721412420272827, | |
| "logps/rejected": -0.8087137341499329, | |
| "loss": 23.6178, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.214126586914062, | |
| "rewards/margins": -19.12698745727539, | |
| "rewards/rejected": -8.087137222290039, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.2570921985815603, | |
| "grad_norm": 206.26448986564472, | |
| "learning_rate": 5.379438616379695e-08, | |
| "logps/chosen": -2.7158586978912354, | |
| "logps/rejected": -0.8855924606323242, | |
| "loss": 23.955, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.158584594726562, | |
| "rewards/margins": -18.30265998840332, | |
| "rewards/rejected": -8.855923652648926, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.26595744680851063, | |
| "grad_norm": 134.17638099270795, | |
| "learning_rate": 5.331655181347497e-08, | |
| "logps/chosen": -2.5544021129608154, | |
| "logps/rejected": -0.885018527507782, | |
| "loss": 22.7027, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -25.544025421142578, | |
| "rewards/margins": -16.693838119506836, | |
| "rewards/rejected": -8.850184440612793, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.274822695035461, | |
| "grad_norm": 129.6289310031373, | |
| "learning_rate": 5.281537798800162e-08, | |
| "logps/chosen": -2.8060669898986816, | |
| "logps/rejected": -0.8253576159477234, | |
| "loss": 23.4732, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -28.0606689453125, | |
| "rewards/margins": -19.807092666625977, | |
| "rewards/rejected": -8.253576278686523, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.28368794326241137, | |
| "grad_norm": 129.83140430467532, | |
| "learning_rate": 5.2291345723262914e-08, | |
| "logps/chosen": -2.6562483310699463, | |
| "logps/rejected": -0.8298206329345703, | |
| "loss": 23.2456, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.562480926513672, | |
| "rewards/margins": -18.2642765045166, | |
| "rewards/rejected": -8.29820728302002, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.2925531914893617, | |
| "grad_norm": 128.32793130409877, | |
| "learning_rate": 5.174495799509666e-08, | |
| "logps/chosen": -2.7844228744506836, | |
| "logps/rejected": -0.8672133684158325, | |
| "loss": 23.6542, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.844226837158203, | |
| "rewards/margins": -19.17209815979004, | |
| "rewards/rejected": -8.67213249206543, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.30141843971631205, | |
| "grad_norm": 130.4366879580412, | |
| "learning_rate": 5.1176739236527024e-08, | |
| "logps/chosen": -2.6014797687530518, | |
| "logps/rejected": -0.7989243268966675, | |
| "loss": 23.797, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.01479721069336, | |
| "rewards/margins": -18.025556564331055, | |
| "rewards/rejected": -7.9892425537109375, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3102836879432624, | |
| "grad_norm": 125.04571943056439, | |
| "learning_rate": 5.058723483440399e-08, | |
| "logps/chosen": -2.8481719493865967, | |
| "logps/rejected": -0.915848433971405, | |
| "loss": 24.5149, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -28.48171615600586, | |
| "rewards/margins": -19.323230743408203, | |
| "rewards/rejected": -9.15848445892334, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.3191489361702128, | |
| "grad_norm": 130.27428829850032, | |
| "learning_rate": 4.997701060593102e-08, | |
| "logps/chosen": -2.6424190998077393, | |
| "logps/rejected": -0.8689352869987488, | |
| "loss": 23.739, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.424190521240234, | |
| "rewards/margins": -17.73483657836914, | |
| "rewards/rejected": -8.689352035522461, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3280141843971631, | |
| "grad_norm": 124.33489480845863, | |
| "learning_rate": 4.934665225558327e-08, | |
| "logps/chosen": -2.634460926055908, | |
| "logps/rejected": -0.8169125318527222, | |
| "loss": 22.9736, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.3446102142334, | |
| "rewards/margins": -18.175479888916016, | |
| "rewards/rejected": -8.1691255569458, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.33687943262411346, | |
| "grad_norm": 124.15263735720329, | |
| "learning_rate": 4.869676481293759e-08, | |
| "logps/chosen": -2.4845986366271973, | |
| "logps/rejected": -0.9172335863113403, | |
| "loss": 22.6986, | |
| "rewards/accuracies": 0.10000000149011612, | |
| "rewards/chosen": -24.845985412597656, | |
| "rewards/margins": -15.6736478805542, | |
| "rewards/rejected": -9.172337532043457, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.34574468085106386, | |
| "grad_norm": 125.80201305563536, | |
| "learning_rate": 4.8027972051954006e-08, | |
| "logps/chosen": -2.5603835582733154, | |
| "logps/rejected": -0.8787148594856262, | |
| "loss": 23.5192, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -25.603836059570312, | |
| "rewards/margins": -16.81669044494629, | |
| "rewards/rejected": -8.787147521972656, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.3546099290780142, | |
| "grad_norm": 123.74788555507502, | |
| "learning_rate": 4.734091589226594e-08, | |
| "logps/chosen": -2.6561684608459473, | |
| "logps/rejected": -0.8788897395133972, | |
| "loss": 22.7857, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.561681747436523, | |
| "rewards/margins": -17.772785186767578, | |
| "rewards/rejected": -8.788897514343262, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.36347517730496454, | |
| "grad_norm": 128.63883007903138, | |
| "learning_rate": 4.663625578305394e-08, | |
| "logps/chosen": -2.8956170082092285, | |
| "logps/rejected": -0.961021900177002, | |
| "loss": 22.1626, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -28.9561710357666, | |
| "rewards/margins": -19.345951080322266, | |
| "rewards/rejected": -9.61021900177002, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.3723404255319149, | |
| "grad_norm": 122.637160225811, | |
| "learning_rate": 4.591466807009411e-08, | |
| "logps/chosen": -2.7748825550079346, | |
| "logps/rejected": -0.8324272036552429, | |
| "loss": 24.01, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.748821258544922, | |
| "rewards/margins": -19.42455291748047, | |
| "rewards/rejected": -8.324272155761719, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.38120567375886527, | |
| "grad_norm": 125.95797661768107, | |
| "learning_rate": 4.517684534658905e-08, | |
| "logps/chosen": -2.762359619140625, | |
| "logps/rejected": -0.899163544178009, | |
| "loss": 23.4109, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.623592376708984, | |
| "rewards/margins": -18.6319580078125, | |
| "rewards/rejected": -8.991636276245117, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.3900709219858156, | |
| "grad_norm": 123.62675866923496, | |
| "learning_rate": 4.4423495788404036e-08, | |
| "logps/chosen": -2.753671407699585, | |
| "logps/rejected": -0.9254837036132812, | |
| "loss": 22.555, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.536712646484375, | |
| "rewards/margins": -18.281875610351562, | |
| "rewards/rejected": -9.254836082458496, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.39893617021276595, | |
| "grad_norm": 139.3227676061545, | |
| "learning_rate": 4.365534247434681e-08, | |
| "logps/chosen": -2.626866102218628, | |
| "logps/rejected": -0.8860219717025757, | |
| "loss": 22.7427, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.268661499023438, | |
| "rewards/margins": -17.408443450927734, | |
| "rewards/rejected": -8.860219955444336, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.4078014184397163, | |
| "grad_norm": 126.61555387712745, | |
| "learning_rate": 4.2873122692143174e-08, | |
| "logps/chosen": -2.62453031539917, | |
| "logps/rejected": -0.9488039016723633, | |
| "loss": 22.6716, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.245304107666016, | |
| "rewards/margins": -16.757265090942383, | |
| "rewards/rejected": -9.488039016723633, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.4166666666666667, | |
| "grad_norm": 134.78909968442065, | |
| "learning_rate": 4.207758723077463e-08, | |
| "logps/chosen": -2.7081642150878906, | |
| "logps/rejected": -0.9721806645393372, | |
| "loss": 22.6942, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.08164405822754, | |
| "rewards/margins": -17.35983657836914, | |
| "rewards/rejected": -9.721807479858398, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.425531914893617, | |
| "grad_norm": 157.23185366158737, | |
| "learning_rate": 4.126949965985725e-08, | |
| "logps/chosen": -2.8437817096710205, | |
| "logps/rejected": -0.9527214169502258, | |
| "loss": 22.2947, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -28.437816619873047, | |
| "rewards/margins": -18.91060447692871, | |
| "rewards/rejected": -9.527214050292969, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.43439716312056736, | |
| "grad_norm": 129.02608007179418, | |
| "learning_rate": 4.0449635596753506e-08, | |
| "logps/chosen": -2.636435031890869, | |
| "logps/rejected": -0.9736749529838562, | |
| "loss": 23.4831, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.36434555053711, | |
| "rewards/margins": -16.62759780883789, | |
| "rewards/rejected": -9.736749649047852, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.4432624113475177, | |
| "grad_norm": 135.37504967455945, | |
| "learning_rate": 3.961878196212035e-08, | |
| "logps/chosen": -2.8127787113189697, | |
| "logps/rejected": -1.0067663192749023, | |
| "loss": 22.2267, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -28.127782821655273, | |
| "rewards/margins": -18.060121536254883, | |
| "rewards/rejected": -10.067663192749023, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.4521276595744681, | |
| "grad_norm": 132.62121945542594, | |
| "learning_rate": 3.877773622460831e-08, | |
| "logps/chosen": -2.6574721336364746, | |
| "logps/rejected": -0.9831992983818054, | |
| "loss": 22.069, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.574718475341797, | |
| "rewards/margins": -16.742725372314453, | |
| "rewards/rejected": -9.831993103027344, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.46099290780141844, | |
| "grad_norm": 125.06632012924307, | |
| "learning_rate": 3.7927305635436316e-08, | |
| "logps/chosen": -2.679917812347412, | |
| "logps/rejected": -0.9020110964775085, | |
| "loss": 22.2124, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.799179077148438, | |
| "rewards/margins": -17.779069900512695, | |
| "rewards/rejected": -9.020112991333008, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.4698581560283688, | |
| "grad_norm": 155.40401006121775, | |
| "learning_rate": 3.7068306453577133e-08, | |
| "logps/chosen": -2.674633741378784, | |
| "logps/rejected": -1.052839756011963, | |
| "loss": 22.077, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.746337890625, | |
| "rewards/margins": -16.217939376831055, | |
| "rewards/rejected": -10.528398513793945, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.4787234042553192, | |
| "grad_norm": 126.39619832153441, | |
| "learning_rate": 3.6201563162296865e-08, | |
| "logps/chosen": -2.6174607276916504, | |
| "logps/rejected": -0.9958217740058899, | |
| "loss": 21.4589, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.174606323242188, | |
| "rewards/margins": -16.216388702392578, | |
| "rewards/rejected": -9.958218574523926, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.4875886524822695, | |
| "grad_norm": 132.9454118011964, | |
| "learning_rate": 3.5327907677800764e-08, | |
| "logps/chosen": -2.5700345039367676, | |
| "logps/rejected": -0.9127339124679565, | |
| "loss": 21.7442, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -25.700342178344727, | |
| "rewards/margins": -16.57300567626953, | |
| "rewards/rejected": -9.127340316772461, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.49645390070921985, | |
| "grad_norm": 138.9863606047641, | |
| "learning_rate": 3.444817855074469e-08, | |
| "logps/chosen": -2.705763339996338, | |
| "logps/rejected": -0.9670157432556152, | |
| "loss": 22.0458, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.057632446289062, | |
| "rewards/margins": -17.38747787475586, | |
| "rewards/rejected": -9.670157432556152, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5053191489361702, | |
| "grad_norm": 147.23847848265115, | |
| "learning_rate": 3.35632201613787e-08, | |
| "logps/chosen": -2.5446105003356934, | |
| "logps/rejected": -1.0588749647140503, | |
| "loss": 21.5976, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -25.44610595703125, | |
| "rewards/margins": -14.857358932495117, | |
| "rewards/rejected": -10.588749885559082, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.5141843971631206, | |
| "grad_norm": 141.20124740919812, | |
| "learning_rate": 3.267388190909531e-08, | |
| "logps/chosen": -2.746526002883911, | |
| "logps/rejected": -1.0211728811264038, | |
| "loss": 21.7845, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.465261459350586, | |
| "rewards/margins": -17.253530502319336, | |
| "rewards/rejected": -10.211729049682617, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5230496453900709, | |
| "grad_norm": 135.32583172815455, | |
| "learning_rate": 3.1781017397160316e-08, | |
| "logps/chosen": -2.6587398052215576, | |
| "logps/rejected": -1.0348222255706787, | |
| "loss": 21.747, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.587398529052734, | |
| "rewards/margins": -16.239177703857422, | |
| "rewards/rejected": -10.348223686218262, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.5319148936170213, | |
| "grad_norm": 139.77157876582095, | |
| "learning_rate": 3.0885483613408555e-08, | |
| "logps/chosen": -2.373941421508789, | |
| "logps/rejected": -0.9718824625015259, | |
| "loss": 20.4512, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -23.739416122436523, | |
| "rewards/margins": -14.020593643188477, | |
| "rewards/rejected": -9.718823432922363, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5407801418439716, | |
| "grad_norm": 147.51378362316794, | |
| "learning_rate": 2.998814010769123e-08, | |
| "logps/chosen": -2.698490619659424, | |
| "logps/rejected": -1.0464816093444824, | |
| "loss": 20.7516, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.984905242919922, | |
| "rewards/margins": -16.520090103149414, | |
| "rewards/rejected": -10.464816093444824, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.549645390070922, | |
| "grad_norm": 142.23462700614618, | |
| "learning_rate": 2.9089848166864093e-08, | |
| "logps/chosen": -2.676140546798706, | |
| "logps/rejected": -1.064896583557129, | |
| "loss": 20.8999, | |
| "rewards/accuracies": 0.10000000149011612, | |
| "rewards/chosen": -26.761404037475586, | |
| "rewards/margins": -16.112438201904297, | |
| "rewards/rejected": -10.648966789245605, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.5585106382978723, | |
| "grad_norm": 139.0705721087497, | |
| "learning_rate": 2.8191469988108394e-08, | |
| "logps/chosen": -2.5131092071533203, | |
| "logps/rejected": -1.1397688388824463, | |
| "loss": 20.1183, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -25.131092071533203, | |
| "rewards/margins": -13.733403205871582, | |
| "rewards/rejected": -11.397688865661621, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.5673758865248227, | |
| "grad_norm": 141.8773798172117, | |
| "learning_rate": 2.729386785137818e-08, | |
| "logps/chosen": -2.4425089359283447, | |
| "logps/rejected": -1.07571280002594, | |
| "loss": 21.6343, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -24.425086975097656, | |
| "rewards/margins": -13.667961120605469, | |
| "rewards/rejected": -10.75712776184082, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.5762411347517731, | |
| "grad_norm": 147.69861320272494, | |
| "learning_rate": 2.6397903291767978e-08, | |
| "logps/chosen": -2.7355971336364746, | |
| "logps/rejected": -1.1018916368484497, | |
| "loss": 20.496, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.355968475341797, | |
| "rewards/margins": -16.337055206298828, | |
| "rewards/rejected": -11.018915176391602, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.5851063829787234, | |
| "grad_norm": 149.1225672419858, | |
| "learning_rate": 2.5504436272595635e-08, | |
| "logps/chosen": -2.5969009399414062, | |
| "logps/rejected": -1.0739166736602783, | |
| "loss": 20.72, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -25.969013214111328, | |
| "rewards/margins": -15.22984504699707, | |
| "rewards/rejected": -10.739164352416992, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.5939716312056738, | |
| "grad_norm": 157.4691254355891, | |
| "learning_rate": 2.4614324359993557e-08, | |
| "logps/chosen": -2.7349042892456055, | |
| "logps/rejected": -1.0884466171264648, | |
| "loss": 21.054, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.349040985107422, | |
| "rewards/margins": -16.464576721191406, | |
| "rewards/rejected": -10.884466171264648, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.6028368794326241, | |
| "grad_norm": 146.0536422019217, | |
| "learning_rate": 2.372842189980099e-08, | |
| "logps/chosen": -2.7785160541534424, | |
| "logps/rejected": -1.1260712146759033, | |
| "loss": 20.4037, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.785160064697266, | |
| "rewards/margins": -16.524446487426758, | |
| "rewards/rejected": -11.260711669921875, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6117021276595744, | |
| "grad_norm": 149.39663092505745, | |
| "learning_rate": 2.284757919754703e-08, | |
| "logps/chosen": -2.7923343181610107, | |
| "logps/rejected": -1.0744545459747314, | |
| "loss": 20.8174, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.92333984375, | |
| "rewards/margins": -17.17879867553711, | |
| "rewards/rejected": -10.74454402923584, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.6205673758865248, | |
| "grad_norm": 147.46507158458496, | |
| "learning_rate": 2.1972641702311782e-08, | |
| "logps/chosen": -2.5333476066589355, | |
| "logps/rejected": -1.0287792682647705, | |
| "loss": 21.4482, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -25.33347511291504, | |
| "rewards/margins": -15.045679092407227, | |
| "rewards/rejected": -10.28779411315918, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.6294326241134752, | |
| "grad_norm": 147.01924179933897, | |
| "learning_rate": 2.1104449195248647e-08, | |
| "logps/chosen": -2.6328065395355225, | |
| "logps/rejected": -1.1154229640960693, | |
| "loss": 20.6301, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.328067779541016, | |
| "rewards/margins": -15.173837661743164, | |
| "rewards/rejected": -11.154230117797852, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.6382978723404256, | |
| "grad_norm": 148.7416966559369, | |
| "learning_rate": 2.024383498354694e-08, | |
| "logps/chosen": -2.7731688022613525, | |
| "logps/rejected": -1.0988487005233765, | |
| "loss": 20.7721, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.731685638427734, | |
| "rewards/margins": -16.743200302124023, | |
| "rewards/rejected": -10.98848819732666, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.6471631205673759, | |
| "grad_norm": 150.67235901875148, | |
| "learning_rate": 1.939162510060823e-08, | |
| "logps/chosen": -2.8284106254577637, | |
| "logps/rejected": -1.1401846408843994, | |
| "loss": 20.7806, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -28.284109115600586, | |
| "rewards/margins": -16.882261276245117, | |
| "rewards/rejected": -11.401845932006836, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.6560283687943262, | |
| "grad_norm": 155.38197341493145, | |
| "learning_rate": 1.8548637513204236e-08, | |
| "logps/chosen": -2.6566267013549805, | |
| "logps/rejected": -1.141741156578064, | |
| "loss": 20.6873, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.566265106201172, | |
| "rewards/margins": -15.14885425567627, | |
| "rewards/rejected": -11.417410850524902, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.6648936170212766, | |
| "grad_norm": 157.1118672693283, | |
| "learning_rate": 1.7715681336377283e-08, | |
| "logps/chosen": -2.611238479614258, | |
| "logps/rejected": -1.178731918334961, | |
| "loss": 20.0012, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.112384796142578, | |
| "rewards/margins": -14.325067520141602, | |
| "rewards/rejected": -11.787318229675293, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.6737588652482269, | |
| "grad_norm": 159.0746228987349, | |
| "learning_rate": 1.689355605683665e-08, | |
| "logps/chosen": -2.7305948734283447, | |
| "logps/rejected": -1.2593529224395752, | |
| "loss": 20.4842, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.30594825744629, | |
| "rewards/margins": -14.712419509887695, | |
| "rewards/rejected": -12.593527793884277, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.6826241134751773, | |
| "grad_norm": 160.39564957687494, | |
| "learning_rate": 1.6083050765596617e-08, | |
| "logps/chosen": -2.788814067840576, | |
| "logps/rejected": -1.1364014148712158, | |
| "loss": 20.5859, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.88814353942871, | |
| "rewards/margins": -16.52412986755371, | |
| "rewards/rejected": -11.364013671875, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.6914893617021277, | |
| "grad_norm": 164.34029604866143, | |
| "learning_rate": 1.5284943400592178e-08, | |
| "logps/chosen": -2.724658489227295, | |
| "logps/rejected": -1.1553473472595215, | |
| "loss": 20.3371, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.24658203125, | |
| "rewards/margins": -15.693109512329102, | |
| "rewards/rejected": -11.553472518920898, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.700354609929078, | |
| "grad_norm": 159.44709692393246, | |
| "learning_rate": 1.4500000000000007e-08, | |
| "logps/chosen": -2.8505020141601562, | |
| "logps/rejected": -1.1487619876861572, | |
| "loss": 19.794, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -28.505020141601562, | |
| "rewards/margins": -17.01739501953125, | |
| "rewards/rejected": -11.48762035369873, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.7092198581560284, | |
| "grad_norm": 158.29298102146703, | |
| "learning_rate": 1.3728973966980781e-08, | |
| "logps/chosen": -2.527055025100708, | |
| "logps/rejected": -1.2128355503082275, | |
| "loss": 20.2621, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -25.27054786682129, | |
| "rewards/margins": -13.142193794250488, | |
| "rewards/rejected": -12.1283540725708, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7092198581560284, | |
| "eval_logps/chosen": -2.758201837539673, | |
| "eval_logps/rejected": -1.134634256362915, | |
| "eval_loss": 20.37216567993164, | |
| "eval_rewards/accuracies": 0.0, | |
| "eval_rewards/chosen": -27.582021713256836, | |
| "eval_rewards/margins": -16.23567771911621, | |
| "eval_rewards/rejected": -11.346341133117676, | |
| "eval_runtime": 4.3171, | |
| "eval_samples_per_second": 2.78, | |
| "eval_steps_per_second": 0.695, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7180851063829787, | |
| "grad_norm": 154.90237635782003, | |
| "learning_rate": 1.2972605346548983e-08, | |
| "logps/chosen": -2.6931498050689697, | |
| "logps/rejected": -1.1878819465637207, | |
| "loss": 20.3974, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.93149757385254, | |
| "rewards/margins": -15.052679061889648, | |
| "rewards/rejected": -11.878819465637207, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.7269503546099291, | |
| "grad_norm": 160.02874826439538, | |
| "learning_rate": 1.2231620115263997e-08, | |
| "logps/chosen": -2.548884630203247, | |
| "logps/rejected": -1.1739720106124878, | |
| "loss": 18.9467, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -25.488845825195312, | |
| "rewards/margins": -13.749125480651855, | |
| "rewards/rejected": -11.739721298217773, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.7358156028368794, | |
| "grad_norm": 180.13350727054066, | |
| "learning_rate": 1.1506729484424434e-08, | |
| "logps/chosen": -2.5885915756225586, | |
| "logps/rejected": -1.1826770305633545, | |
| "loss": 19.4856, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -25.885915756225586, | |
| "rewards/margins": -14.0591459274292, | |
| "rewards/rejected": -11.826769828796387, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.7446808510638298, | |
| "grad_norm": 164.62206406128104, | |
| "learning_rate": 1.0798629217434427e-08, | |
| "logps/chosen": -2.724733591079712, | |
| "logps/rejected": -1.2293965816497803, | |
| "loss": 19.5944, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.24733543395996, | |
| "rewards/margins": -14.953369140625, | |
| "rewards/rejected": -12.293964385986328, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.7535460992907801, | |
| "grad_norm": 158.88040216218286, | |
| "learning_rate": 1.0107998961996992e-08, | |
| "logps/chosen": -2.7371504306793213, | |
| "logps/rejected": -1.2214720249176025, | |
| "loss": 20.0188, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.371501922607422, | |
| "rewards/margins": -15.156784057617188, | |
| "rewards/rejected": -12.214719772338867, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.7624113475177305, | |
| "grad_norm": 166.98528739652193, | |
| "learning_rate": 9.435501597775744e-09, | |
| "logps/chosen": -2.7026240825653076, | |
| "logps/rejected": -1.18560791015625, | |
| "loss": 20.6567, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.026241302490234, | |
| "rewards/margins": -15.17016315460205, | |
| "rewards/rejected": -11.856078147888184, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.7712765957446809, | |
| "grad_norm": 166.73536839937955, | |
| "learning_rate": 8.781782600150627e-09, | |
| "logps/chosen": -2.6552720069885254, | |
| "logps/rejected": -1.1895506381988525, | |
| "loss": 19.3649, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.552722930908203, | |
| "rewards/margins": -14.65721321105957, | |
| "rewards/rejected": -11.895506858825684, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.7801418439716312, | |
| "grad_norm": 179.8479470719166, | |
| "learning_rate": 8.147469420678878e-09, | |
| "logps/chosen": -2.5452492237091064, | |
| "logps/rejected": -1.2747502326965332, | |
| "loss": 19.5778, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -25.452489852905273, | |
| "rewards/margins": -12.704987525939941, | |
| "rewards/rejected": -12.747503280639648, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.7890070921985816, | |
| "grad_norm": 163.6317107864734, | |
| "learning_rate": 7.53317088485529e-09, | |
| "logps/chosen": -2.723301410675049, | |
| "logps/rejected": -1.1577608585357666, | |
| "loss": 20.0476, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.233013153076172, | |
| "rewards/margins": -15.655405044555664, | |
| "rewards/rejected": -11.577608108520508, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.7978723404255319, | |
| "grad_norm": 167.4373665727582, | |
| "learning_rate": 6.939476607750421e-09, | |
| "logps/chosen": -2.6121654510498047, | |
| "logps/rejected": -1.3264158964157104, | |
| "loss": 19.4318, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.121652603149414, | |
| "rewards/margins": -12.85749340057373, | |
| "rewards/rejected": -13.26416015625, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.8067375886524822, | |
| "grad_norm": 162.53694283390337, | |
| "learning_rate": 6.366956428087029e-09, | |
| "logps/chosen": -2.6545283794403076, | |
| "logps/rejected": -1.1533528566360474, | |
| "loss": 19.4941, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.5452823638916, | |
| "rewards/margins": -15.011754035949707, | |
| "rewards/rejected": -11.533529281616211, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.8156028368794326, | |
| "grad_norm": 153.7988146863778, | |
| "learning_rate": 5.816159861298473e-09, | |
| "logps/chosen": -2.5948379039764404, | |
| "logps/rejected": -1.242906093597412, | |
| "loss": 19.0391, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -25.948379516601562, | |
| "rewards/margins": -13.519317626953125, | |
| "rewards/rejected": -12.429059982299805, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.824468085106383, | |
| "grad_norm": 175.82612782050703, | |
| "learning_rate": 5.287615572093552e-09, | |
| "logps/chosen": -2.5693373680114746, | |
| "logps/rejected": -1.265942931175232, | |
| "loss": 19.1887, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -25.693370819091797, | |
| "rewards/margins": -13.033945083618164, | |
| "rewards/rejected": -12.659429550170898, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 176.4401422786228, | |
| "learning_rate": 4.781830867034439e-09, | |
| "logps/chosen": -2.820774555206299, | |
| "logps/rejected": -1.2720921039581299, | |
| "loss": 19.2298, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -28.207748413085938, | |
| "rewards/margins": -15.486825942993164, | |
| "rewards/rejected": -12.720921516418457, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.8421985815602837, | |
| "grad_norm": 181.00370501517915, | |
| "learning_rate": 4.29929120761447e-09, | |
| "logps/chosen": -2.8616652488708496, | |
| "logps/rejected": -1.2115236520767212, | |
| "loss": 20.3312, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -28.616657257080078, | |
| "rewards/margins": -16.501420974731445, | |
| "rewards/rejected": -12.115234375, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.851063829787234, | |
| "grad_norm": 168.83953072620574, | |
| "learning_rate": 3.840459744303181e-09, | |
| "logps/chosen": -2.5970399379730225, | |
| "logps/rejected": -1.2681810855865479, | |
| "loss": 19.4312, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -25.970401763916016, | |
| "rewards/margins": -13.28858757019043, | |
| "rewards/rejected": -12.681811332702637, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.8599290780141844, | |
| "grad_norm": 175.0708401385156, | |
| "learning_rate": 3.4057768720060405e-09, | |
| "logps/chosen": -2.7182438373565674, | |
| "logps/rejected": -1.2552506923675537, | |
| "loss": 18.7786, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.18243980407715, | |
| "rewards/margins": -14.629931449890137, | |
| "rewards/rejected": -12.552507400512695, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.8687943262411347, | |
| "grad_norm": 169.95868089862816, | |
| "learning_rate": 2.9956598073651933e-09, | |
| "logps/chosen": -2.7487266063690186, | |
| "logps/rejected": -1.2310268878936768, | |
| "loss": 19.2411, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.48726463317871, | |
| "rewards/margins": -15.176997184753418, | |
| "rewards/rejected": -12.310269355773926, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.8776595744680851, | |
| "grad_norm": 171.60894438461307, | |
| "learning_rate": 2.610502188307318e-09, | |
| "logps/chosen": -2.6313605308532715, | |
| "logps/rejected": -1.280606746673584, | |
| "loss": 19.4042, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.3136043548584, | |
| "rewards/margins": -13.507535934448242, | |
| "rewards/rejected": -12.806066513061523, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.8865248226950354, | |
| "grad_norm": 173.18018279723242, | |
| "learning_rate": 2.2506736962226007e-09, | |
| "logps/chosen": -2.6380727291107178, | |
| "logps/rejected": -1.190015196800232, | |
| "loss": 19.7095, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.3807315826416, | |
| "rewards/margins": -14.48057746887207, | |
| "rewards/rejected": -11.900152206420898, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8953900709219859, | |
| "grad_norm": 175.53613825680904, | |
| "learning_rate": 1.9165197011377867e-09, | |
| "logps/chosen": -2.7568583488464355, | |
| "logps/rejected": -1.2415708303451538, | |
| "loss": 19.7569, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.568584442138672, | |
| "rewards/margins": -15.152874946594238, | |
| "rewards/rejected": -12.415709495544434, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.9042553191489362, | |
| "grad_norm": 162.97444514063847, | |
| "learning_rate": 1.6083609302235894e-09, | |
| "logps/chosen": -2.7213332653045654, | |
| "logps/rejected": -1.1794496774673462, | |
| "loss": 19.573, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.213333129882812, | |
| "rewards/margins": -15.418835639953613, | |
| "rewards/rejected": -11.794496536254883, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.9131205673758865, | |
| "grad_norm": 151.42734728228336, | |
| "learning_rate": 1.3264931599548922e-09, | |
| "logps/chosen": -2.677914619445801, | |
| "logps/rejected": -1.2583473920822144, | |
| "loss": 18.8208, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.77914810180664, | |
| "rewards/margins": -14.195671081542969, | |
| "rewards/rejected": -12.583473205566406, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.9219858156028369, | |
| "grad_norm": 171.47791458273645, | |
| "learning_rate": 1.0711869322189803e-09, | |
| "logps/chosen": -2.7254385948181152, | |
| "logps/rejected": -1.2042250633239746, | |
| "loss": 19.4982, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.254383087158203, | |
| "rewards/margins": -15.212132453918457, | |
| "rewards/rejected": -12.04224967956543, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.9308510638297872, | |
| "grad_norm": 179.5072478570033, | |
| "learning_rate": 8.426872946444917e-10, | |
| "logps/chosen": -2.789022207260132, | |
| "logps/rejected": -1.2490054368972778, | |
| "loss": 20.0711, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.89022445678711, | |
| "rewards/margins": -15.400167465209961, | |
| "rewards/rejected": -12.4900541305542, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.9397163120567376, | |
| "grad_norm": 174.6622317955513, | |
| "learning_rate": 6.412135654001361e-10, | |
| "logps/chosen": -2.837013006210327, | |
| "logps/rejected": -1.195846676826477, | |
| "loss": 20.0662, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -28.370128631591797, | |
| "rewards/margins": -16.411663055419922, | |
| "rewards/rejected": -11.958466529846191, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.9485815602836879, | |
| "grad_norm": 186.84842211636166, | |
| "learning_rate": 4.669591226890988e-10, | |
| "logps/chosen": -2.5151777267456055, | |
| "logps/rejected": -1.26199209690094, | |
| "loss": 18.6726, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -25.151775360107422, | |
| "rewards/margins": -12.531854629516602, | |
| "rewards/rejected": -12.61992073059082, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.9574468085106383, | |
| "grad_norm": 173.7570370872437, | |
| "learning_rate": 3.2009121914106934e-10, | |
| "logps/chosen": -2.64577579498291, | |
| "logps/rejected": -1.2345296144485474, | |
| "loss": 19.1912, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -26.4577579498291, | |
| "rewards/margins": -14.112459182739258, | |
| "rewards/rejected": -12.345295906066895, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.9663120567375887, | |
| "grad_norm": 167.4037504767395, | |
| "learning_rate": 2.0075082128006429e-10, | |
| "logps/chosen": -2.7054450511932373, | |
| "logps/rejected": -1.2753221988677979, | |
| "loss": 20.0097, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -27.0544490814209, | |
| "rewards/margins": -14.301225662231445, | |
| "rewards/rejected": -12.753222465515137, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.975177304964539, | |
| "grad_norm": 165.08220357346846, | |
| "learning_rate": 1.0905247422218467e-10, | |
| "logps/chosen": -2.571406602859497, | |
| "logps/rejected": -1.235921859741211, | |
| "loss": 19.2619, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -25.714065551757812, | |
| "rewards/margins": -13.35484790802002, | |
| "rewards/rejected": -12.359219551086426, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.9840425531914894, | |
| "grad_norm": 164.52820389659772, | |
| "learning_rate": 4.508419173306577e-11, | |
| "logps/chosen": -2.555917739868164, | |
| "logps/rejected": -1.320312738418579, | |
| "loss": 19.2352, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -25.55917739868164, | |
| "rewards/margins": -12.356046676635742, | |
| "rewards/rejected": -13.20312786102295, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.9929078014184397, | |
| "grad_norm": 174.49589763108904, | |
| "learning_rate": 8.907371750672467e-12, | |
| "logps/chosen": -2.5288567543029785, | |
| "logps/rejected": -1.209428071975708, | |
| "loss": 19.212, | |
| "rewards/accuracies": 0.10000000149011612, | |
| "rewards/chosen": -25.2885684967041, | |
| "rewards/margins": -13.194287300109863, | |
| "rewards/rejected": -12.094281196594238, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 564, | |
| "total_flos": 0.0, | |
| "train_loss": 21.83849455109725, | |
| "train_runtime": 4561.3354, | |
| "train_samples_per_second": 1.236, | |
| "train_steps_per_second": 0.124 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 564, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |