| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9984, | |
| "eval_steps": 500, | |
| "global_step": 468, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.010666666666666666, | |
| "grad_norm": 15.542097378420866, | |
| "learning_rate": 1.0638297872340425e-07, | |
| "logits/chosen": -0.9908615350723267, | |
| "logits/rejected": -1.0208933353424072, | |
| "logps/chosen": -0.2650989294052124, | |
| "logps/rejected": -0.2679658532142639, | |
| "loss": 3.0616, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -2.650989294052124, | |
| "rewards/margins": 0.028668876737356186, | |
| "rewards/rejected": -2.6796584129333496, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.021333333333333333, | |
| "grad_norm": 15.236258225562361, | |
| "learning_rate": 2.127659574468085e-07, | |
| "logits/chosen": -0.9872478246688843, | |
| "logits/rejected": -1.0333178043365479, | |
| "logps/chosen": -0.2723778486251831, | |
| "logps/rejected": -0.2769668400287628, | |
| "loss": 3.0091, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -2.723778486251831, | |
| "rewards/margins": 0.0458899661898613, | |
| "rewards/rejected": -2.7696685791015625, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 16.04597079407282, | |
| "learning_rate": 3.1914893617021275e-07, | |
| "logits/chosen": -1.054266333580017, | |
| "logits/rejected": -1.071502685546875, | |
| "logps/chosen": -0.27361491322517395, | |
| "logps/rejected": -0.2760830521583557, | |
| "loss": 3.0376, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -2.736149311065674, | |
| "rewards/margins": 0.02468101680278778, | |
| "rewards/rejected": -2.7608304023742676, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.042666666666666665, | |
| "grad_norm": 17.377673929521983, | |
| "learning_rate": 4.25531914893617e-07, | |
| "logits/chosen": -1.0661036968231201, | |
| "logits/rejected": -1.10869300365448, | |
| "logps/chosen": -0.2736705541610718, | |
| "logps/rejected": -0.27885910868644714, | |
| "loss": 3.0339, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -2.7367053031921387, | |
| "rewards/margins": 0.051885683089494705, | |
| "rewards/rejected": -2.788590908050537, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05333333333333334, | |
| "grad_norm": 15.384527480517336, | |
| "learning_rate": 5.319148936170212e-07, | |
| "logits/chosen": -1.00022292137146, | |
| "logits/rejected": -1.0285676717758179, | |
| "logps/chosen": -0.27224716544151306, | |
| "logps/rejected": -0.2667531669139862, | |
| "loss": 3.0482, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -2.7224717140197754, | |
| "rewards/margins": -0.054939769208431244, | |
| "rewards/rejected": -2.667531967163086, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 16.663291386460095, | |
| "learning_rate": 6.382978723404255e-07, | |
| "logits/chosen": -1.0526418685913086, | |
| "logits/rejected": -1.0864059925079346, | |
| "logps/chosen": -0.27711886167526245, | |
| "logps/rejected": -0.28717002272605896, | |
| "loss": 3.0561, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -2.771188497543335, | |
| "rewards/margins": 0.10051168501377106, | |
| "rewards/rejected": -2.8717002868652344, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07466666666666667, | |
| "grad_norm": 15.627909071060607, | |
| "learning_rate": 7.446808510638297e-07, | |
| "logits/chosen": -1.0521974563598633, | |
| "logits/rejected": -1.0903512239456177, | |
| "logps/chosen": -0.26880407333374023, | |
| "logps/rejected": -0.2833861708641052, | |
| "loss": 3.032, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -2.6880409717559814, | |
| "rewards/margins": 0.14582109451293945, | |
| "rewards/rejected": -2.833861827850342, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.08533333333333333, | |
| "grad_norm": 18.064971688821167, | |
| "learning_rate": 8.51063829787234e-07, | |
| "logits/chosen": -1.0548655986785889, | |
| "logits/rejected": -1.069032073020935, | |
| "logps/chosen": -0.27780383825302124, | |
| "logps/rejected": -0.2752782106399536, | |
| "loss": 3.0389, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -2.778038501739502, | |
| "rewards/margins": -0.025255998596549034, | |
| "rewards/rejected": -2.7527823448181152, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 15.909843739204323, | |
| "learning_rate": 9.574468085106384e-07, | |
| "logits/chosen": -1.0730093717575073, | |
| "logits/rejected": -1.0732877254486084, | |
| "logps/chosen": -0.27054473757743835, | |
| "logps/rejected": -0.28124722838401794, | |
| "loss": 3.0145, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -2.705447196960449, | |
| "rewards/margins": 0.10702502727508545, | |
| "rewards/rejected": -2.812472343444824, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.10666666666666667, | |
| "grad_norm": 17.40081947428894, | |
| "learning_rate": 9.998747147528373e-07, | |
| "logits/chosen": -1.0844353437423706, | |
| "logits/rejected": -1.1193302869796753, | |
| "logps/chosen": -0.2774738669395447, | |
| "logps/rejected": -0.28697627782821655, | |
| "loss": 3.0146, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -2.774738311767578, | |
| "rewards/margins": 0.09502413868904114, | |
| "rewards/rejected": -2.869762420654297, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11733333333333333, | |
| "grad_norm": 15.576993262006905, | |
| "learning_rate": 9.991093100466482e-07, | |
| "logits/chosen": -1.0948281288146973, | |
| "logits/rejected": -1.1082046031951904, | |
| "logps/chosen": -0.2911163866519928, | |
| "logps/rejected": -0.29472410678863525, | |
| "loss": 3.0249, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -2.911163568496704, | |
| "rewards/margins": 0.03607722371816635, | |
| "rewards/rejected": -2.9472408294677734, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 16.260673735173633, | |
| "learning_rate": 9.976491676662678e-07, | |
| "logits/chosen": -1.106910228729248, | |
| "logits/rejected": -1.1165117025375366, | |
| "logps/chosen": -0.28884169459342957, | |
| "logps/rejected": -0.2888728082180023, | |
| "loss": 3.0111, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -2.8884167671203613, | |
| "rewards/margins": 0.000311434268951416, | |
| "rewards/rejected": -2.888728618621826, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.13866666666666666, | |
| "grad_norm": 15.259045978055722, | |
| "learning_rate": 9.95496320064109e-07, | |
| "logits/chosen": -1.1103830337524414, | |
| "logits/rejected": -1.1367188692092896, | |
| "logps/chosen": -0.28479719161987305, | |
| "logps/rejected": -0.2901866137981415, | |
| "loss": 3.0013, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -2.8479714393615723, | |
| "rewards/margins": 0.05389442294836044, | |
| "rewards/rejected": -2.9018661975860596, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.14933333333333335, | |
| "grad_norm": 15.800187299464394, | |
| "learning_rate": 9.926537639070456e-07, | |
| "logits/chosen": -1.12952721118927, | |
| "logits/rejected": -1.1671946048736572, | |
| "logps/chosen": -0.2909308671951294, | |
| "logps/rejected": -0.32388028502464294, | |
| "loss": 2.996, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -2.909308671951294, | |
| "rewards/margins": 0.3294942080974579, | |
| "rewards/rejected": -3.2388031482696533, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 17.626483067965456, | |
| "learning_rate": 9.891254559051884e-07, | |
| "logits/chosen": -1.212756633758545, | |
| "logits/rejected": -1.2210320234298706, | |
| "logps/chosen": -0.2985421419143677, | |
| "logps/rejected": -0.30502721667289734, | |
| "loss": 2.9708, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -2.985421657562256, | |
| "rewards/margins": 0.06485103070735931, | |
| "rewards/rejected": -3.0502724647521973, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.17066666666666666, | |
| "grad_norm": 17.71391454265527, | |
| "learning_rate": 9.849163073043223e-07, | |
| "logits/chosen": -1.1545735597610474, | |
| "logits/rejected": -1.2085789442062378, | |
| "logps/chosen": -0.29359039664268494, | |
| "logps/rejected": -0.30682289600372314, | |
| "loss": 2.9847, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -2.9359042644500732, | |
| "rewards/margins": 0.13232454657554626, | |
| "rewards/rejected": -3.0682289600372314, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.18133333333333335, | |
| "grad_norm": 16.3328352772127, | |
| "learning_rate": 9.800321770496724e-07, | |
| "logits/chosen": -1.2470288276672363, | |
| "logits/rejected": -1.2831312417984009, | |
| "logps/chosen": -0.3038308620452881, | |
| "logps/rejected": -0.3224955201148987, | |
| "loss": 3.0144, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -3.038308620452881, | |
| "rewards/margins": 0.18664616346359253, | |
| "rewards/rejected": -3.2249550819396973, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 18.86796938836226, | |
| "learning_rate": 9.744798636305187e-07, | |
| "logits/chosen": -1.1841561794281006, | |
| "logits/rejected": -1.2237173318862915, | |
| "logps/chosen": -0.3177623152732849, | |
| "logps/rejected": -0.3389167785644531, | |
| "loss": 2.9578, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -3.1776235103607178, | |
| "rewards/margins": 0.21154406666755676, | |
| "rewards/rejected": -3.3891677856445312, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.20266666666666666, | |
| "grad_norm": 15.728961940733296, | |
| "learning_rate": 9.68267095617003e-07, | |
| "logits/chosen": -1.2409876585006714, | |
| "logits/rejected": -1.2001683712005615, | |
| "logps/chosen": -0.317212849855423, | |
| "logps/rejected": -0.3224230408668518, | |
| "loss": 2.9979, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -3.172128677368164, | |
| "rewards/margins": 0.052101828157901764, | |
| "rewards/rejected": -3.2242302894592285, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.21333333333333335, | |
| "grad_norm": 15.929749034356965, | |
| "learning_rate": 9.614025209023083e-07, | |
| "logits/chosen": -1.243082046508789, | |
| "logits/rejected": -1.2563217878341675, | |
| "logps/chosen": -0.31486523151397705, | |
| "logps/rejected": -0.36377206444740295, | |
| "loss": 2.9804, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -3.1486525535583496, | |
| "rewards/margins": 0.48906850814819336, | |
| "rewards/rejected": -3.637720823287964, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 18.523618354856197, | |
| "learning_rate": 9.538956946651815e-07, | |
| "logits/chosen": -1.287687063217163, | |
| "logits/rejected": -1.2784639596939087, | |
| "logps/chosen": -0.3346417546272278, | |
| "logps/rejected": -0.34726667404174805, | |
| "loss": 2.9939, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -3.346417188644409, | |
| "rewards/margins": 0.1262492835521698, | |
| "rewards/rejected": -3.4726665019989014, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.23466666666666666, | |
| "grad_norm": 18.401939572120774, | |
| "learning_rate": 9.457570660695539e-07, | |
| "logits/chosen": -1.244755506515503, | |
| "logits/rejected": -1.249961018562317, | |
| "logps/chosen": -0.3117735981941223, | |
| "logps/rejected": -0.3277347981929779, | |
| "loss": 2.9245, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -3.1177361011505127, | |
| "rewards/margins": 0.15961191058158875, | |
| "rewards/rejected": -3.277348041534424, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.24533333333333332, | |
| "grad_norm": 17.42187895151266, | |
| "learning_rate": 9.369979637197774e-07, | |
| "logits/chosen": -1.2914206981658936, | |
| "logits/rejected": -1.2996666431427002, | |
| "logps/chosen": -0.3168942332267761, | |
| "logps/rejected": -0.3301650881767273, | |
| "loss": 2.9149, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -3.1689422130584717, | |
| "rewards/margins": 0.13270840048789978, | |
| "rewards/rejected": -3.3016505241394043, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 18.355579088710684, | |
| "learning_rate": 9.276305798917158e-07, | |
| "logits/chosen": -1.246483564376831, | |
| "logits/rejected": -1.266202688217163, | |
| "logps/chosen": -0.32255855202674866, | |
| "logps/rejected": -0.3426387906074524, | |
| "loss": 2.9557, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -3.2255859375, | |
| "rewards/margins": 0.20080196857452393, | |
| "rewards/rejected": -3.4263877868652344, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.26666666666666666, | |
| "grad_norm": 17.621346858524813, | |
| "learning_rate": 9.176679535616476e-07, | |
| "logits/chosen": -1.247054934501648, | |
| "logits/rejected": -1.2541028261184692, | |
| "logps/chosen": -0.3470916152000427, | |
| "logps/rejected": -0.368325799703598, | |
| "loss": 2.9694, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -3.4709160327911377, | |
| "rewards/margins": 0.2123415172100067, | |
| "rewards/rejected": -3.683258056640625, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.2773333333333333, | |
| "grad_norm": 19.942213640703905, | |
| "learning_rate": 9.071239522565976e-07, | |
| "logits/chosen": -1.3265608549118042, | |
| "logits/rejected": -1.3420588970184326, | |
| "logps/chosen": -0.3686971664428711, | |
| "logps/rejected": -0.4225061535835266, | |
| "loss": 3.0102, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -3.686971664428711, | |
| "rewards/margins": 0.5380896925926208, | |
| "rewards/rejected": -4.225061416625977, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 19.54726145620393, | |
| "learning_rate": 8.960132527513642e-07, | |
| "logits/chosen": -1.236342191696167, | |
| "logits/rejected": -1.2464872598648071, | |
| "logps/chosen": -0.3776538074016571, | |
| "logps/rejected": -0.373279333114624, | |
| "loss": 2.9791, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -3.776538133621216, | |
| "rewards/margins": -0.043744854629039764, | |
| "rewards/rejected": -3.732793092727661, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.2986666666666667, | |
| "grad_norm": 19.40428586263726, | |
| "learning_rate": 8.8435132063911e-07, | |
| "logits/chosen": -1.2351807355880737, | |
| "logits/rejected": -1.2663322687149048, | |
| "logps/chosen": -0.3428255021572113, | |
| "logps/rejected": -0.3997129797935486, | |
| "loss": 2.9746, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -3.4282546043395996, | |
| "rewards/margins": 0.568874716758728, | |
| "rewards/rejected": -3.9971299171447754, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.30933333333333335, | |
| "grad_norm": 19.21728993268386, | |
| "learning_rate": 8.721543888039532e-07, | |
| "logits/chosen": -1.2016265392303467, | |
| "logits/rejected": -1.214748501777649, | |
| "logps/chosen": -0.33607161045074463, | |
| "logps/rejected": -0.3531486988067627, | |
| "loss": 2.9215, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -3.3607163429260254, | |
| "rewards/margins": 0.17077085375785828, | |
| "rewards/rejected": -3.531487226486206, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 19.767126676733028, | |
| "learning_rate": 8.594394348255237e-07, | |
| "logits/chosen": -1.2247461080551147, | |
| "logits/rejected": -1.2420233488082886, | |
| "logps/chosen": -0.3638969957828522, | |
| "logps/rejected": -0.3963164687156677, | |
| "loss": 3.037, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -3.638970136642456, | |
| "rewards/margins": 0.3241948187351227, | |
| "rewards/rejected": -3.963164806365967, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.33066666666666666, | |
| "grad_norm": 19.61247251775275, | |
| "learning_rate": 8.462241573469377e-07, | |
| "logits/chosen": -1.2525568008422852, | |
| "logits/rejected": -1.2552361488342285, | |
| "logps/chosen": -0.35086172819137573, | |
| "logps/rejected": -0.4083561301231384, | |
| "loss": 2.9396, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -3.508617877960205, | |
| "rewards/margins": 0.5749433040618896, | |
| "rewards/rejected": -4.083560943603516, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.3413333333333333, | |
| "grad_norm": 20.251747085669653, | |
| "learning_rate": 8.325269514390834e-07, | |
| "logits/chosen": -1.207936406135559, | |
| "logits/rejected": -1.2139708995819092, | |
| "logps/chosen": -0.32931679487228394, | |
| "logps/rejected": -0.35854530334472656, | |
| "loss": 2.994, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -3.29316782951355, | |
| "rewards/margins": 0.2922849655151367, | |
| "rewards/rejected": -3.5854530334472656, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "grad_norm": 19.996942830355515, | |
| "learning_rate": 8.183668829955111e-07, | |
| "logits/chosen": -1.1788911819458008, | |
| "logits/rejected": -1.208141803741455, | |
| "logps/chosen": -0.3463769853115082, | |
| "logps/rejected": -0.36146289110183716, | |
| "loss": 2.9678, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -3.4637699127197266, | |
| "rewards/margins": 0.15085917711257935, | |
| "rewards/rejected": -3.614629030227661, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.3626666666666667, | |
| "grad_norm": 18.25970826279287, | |
| "learning_rate": 8.037636621935684e-07, | |
| "logits/chosen": -1.2193751335144043, | |
| "logits/rejected": -1.2422373294830322, | |
| "logps/chosen": -0.3798995614051819, | |
| "logps/rejected": -0.38932663202285767, | |
| "loss": 2.9514, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -3.7989959716796875, | |
| "rewards/margins": 0.09427039325237274, | |
| "rewards/rejected": -3.893266201019287, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.37333333333333335, | |
| "grad_norm": 19.672992426759237, | |
| "learning_rate": 7.887376160587213e-07, | |
| "logits/chosen": -1.1973555088043213, | |
| "logits/rejected": -1.2301228046417236, | |
| "logps/chosen": -0.38230299949645996, | |
| "logps/rejected": -0.40916380286216736, | |
| "loss": 2.909, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -3.8230299949645996, | |
| "rewards/margins": 0.26860785484313965, | |
| "rewards/rejected": -4.09163761138916, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 17.911399995564594, | |
| "learning_rate": 7.733096601702507e-07, | |
| "logits/chosen": -1.1881186962127686, | |
| "logits/rejected": -1.2008723020553589, | |
| "logps/chosen": -0.34471386671066284, | |
| "logps/rejected": -0.3660317063331604, | |
| "loss": 2.9854, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -3.447139024734497, | |
| "rewards/margins": 0.21317827701568604, | |
| "rewards/rejected": -3.6603171825408936, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.39466666666666667, | |
| "grad_norm": 18.358466758766593, | |
| "learning_rate": 7.575012695477076e-07, | |
| "logits/chosen": -1.18712317943573, | |
| "logits/rejected": -1.2011723518371582, | |
| "logps/chosen": -0.3857135474681854, | |
| "logps/rejected": -0.4228752553462982, | |
| "loss": 2.9108, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -3.857135772705078, | |
| "rewards/margins": 0.37161707878112793, | |
| "rewards/rejected": -4.228753089904785, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.4053333333333333, | |
| "grad_norm": 22.989081667421914, | |
| "learning_rate": 7.413344487586542e-07, | |
| "logits/chosen": -1.243378758430481, | |
| "logits/rejected": -1.2453285455703735, | |
| "logps/chosen": -0.3679312467575073, | |
| "logps/rejected": -0.3737437129020691, | |
| "loss": 2.9705, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -3.679312229156494, | |
| "rewards/margins": 0.058125365525484085, | |
| "rewards/rejected": -3.7374374866485596, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.416, | |
| "grad_norm": 18.67773935679249, | |
| "learning_rate": 7.248317012892968e-07, | |
| "logits/chosen": -1.256320595741272, | |
| "logits/rejected": -1.2754420042037964, | |
| "logps/chosen": -0.4265909194946289, | |
| "logps/rejected": -0.5589956045150757, | |
| "loss": 2.947, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -4.265908718109131, | |
| "rewards/margins": 1.3240468502044678, | |
| "rewards/rejected": -5.589955806732178, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.4266666666666667, | |
| "grad_norm": 18.689783800632924, | |
| "learning_rate": 7.08015998220647e-07, | |
| "logits/chosen": -1.2699859142303467, | |
| "logits/rejected": -1.2951858043670654, | |
| "logps/chosen": -0.3922530710697174, | |
| "logps/rejected": -0.4252193570137024, | |
| "loss": 2.9519, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -3.92253041267395, | |
| "rewards/margins": 0.3296629786491394, | |
| "rewards/rejected": -4.252193927764893, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.43733333333333335, | |
| "grad_norm": 18.65842216703865, | |
| "learning_rate": 6.909107462538111e-07, | |
| "logits/chosen": -1.241351842880249, | |
| "logits/rejected": -1.2584552764892578, | |
| "logps/chosen": -0.39992719888687134, | |
| "logps/rejected": -0.4298134744167328, | |
| "loss": 2.9987, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -3.999271869659424, | |
| "rewards/margins": 0.2988627552986145, | |
| "rewards/rejected": -4.298134803771973, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 18.291353097077145, | |
| "learning_rate": 6.735397551289178e-07, | |
| "logits/chosen": -1.2319579124450684, | |
| "logits/rejected": -1.2307006120681763, | |
| "logps/chosen": -0.37881526350975037, | |
| "logps/rejected": -0.40964803099632263, | |
| "loss": 2.9793, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -3.7881526947021484, | |
| "rewards/margins": 0.30832812190055847, | |
| "rewards/rejected": -4.096480846405029, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.45866666666666667, | |
| "grad_norm": 22.707746897767183, | |
| "learning_rate": 6.559272044830316e-07, | |
| "logits/chosen": -1.2260886430740356, | |
| "logits/rejected": -1.2554783821105957, | |
| "logps/chosen": -0.37825000286102295, | |
| "logps/rejected": -0.41132181882858276, | |
| "loss": 3.0282, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -3.7824997901916504, | |
| "rewards/margins": 0.33071866631507874, | |
| "rewards/rejected": -4.113218784332275, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.4693333333333333, | |
| "grad_norm": 19.197902589030974, | |
| "learning_rate": 6.380976101931879e-07, | |
| "logits/chosen": -1.2666683197021484, | |
| "logits/rejected": -1.255789875984192, | |
| "logps/chosen": -0.4547974169254303, | |
| "logps/rejected": -0.4583858549594879, | |
| "loss": 3.0647, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -4.547974109649658, | |
| "rewards/margins": 0.035884302109479904, | |
| "rewards/rejected": -4.583858489990234, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 18.843696932667417, | |
| "learning_rate": 6.200757902513962e-07, | |
| "logits/chosen": -1.2097585201263428, | |
| "logits/rejected": -1.2467796802520752, | |
| "logps/chosen": -0.39689359068870544, | |
| "logps/rejected": -0.46917811036109924, | |
| "loss": 2.9096, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -3.96893572807312, | |
| "rewards/margins": 0.7228449583053589, | |
| "rewards/rejected": -4.6917805671691895, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.49066666666666664, | |
| "grad_norm": 20.181834593605693, | |
| "learning_rate": 6.018868302191139e-07, | |
| "logits/chosen": -1.1870830059051514, | |
| "logits/rejected": -1.196030855178833, | |
| "logps/chosen": -0.3868725299835205, | |
| "logps/rejected": -0.4080945551395416, | |
| "loss": 2.9443, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -3.868724822998047, | |
| "rewards/margins": 0.21222031116485596, | |
| "rewards/rejected": -4.0809454917907715, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5013333333333333, | |
| "grad_norm": 20.821870588498854, | |
| "learning_rate": 5.835560483092742e-07, | |
| "logits/chosen": -1.2740999460220337, | |
| "logits/rejected": -1.2843743562698364, | |
| "logps/chosen": -0.4087337553501129, | |
| "logps/rejected": -0.4453648030757904, | |
| "loss": 2.9423, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -4.087337017059326, | |
| "rewards/margins": 0.3663104176521301, | |
| "rewards/rejected": -4.453647613525391, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 21.213279684684075, | |
| "learning_rate": 5.651089601444752e-07, | |
| "logits/chosen": -1.2432048320770264, | |
| "logits/rejected": -1.2379240989685059, | |
| "logps/chosen": -0.38207095861434937, | |
| "logps/rejected": -0.4122505784034729, | |
| "loss": 2.9824, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -3.820709705352783, | |
| "rewards/margins": 0.3017956614494324, | |
| "rewards/rejected": -4.122505187988281, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5226666666666666, | |
| "grad_norm": 20.797644682555266, | |
| "learning_rate": 5.465712432403811e-07, | |
| "logits/chosen": -1.2271026372909546, | |
| "logits/rejected": -1.2531237602233887, | |
| "logps/chosen": -0.4181506037712097, | |
| "logps/rejected": -0.44948238134384155, | |
| "loss": 2.9081, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -4.1815056800842285, | |
| "rewards/margins": 0.31331825256347656, | |
| "rewards/rejected": -4.494824409484863, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "grad_norm": 23.47153560832651, | |
| "learning_rate": 5.279687012637798e-07, | |
| "logits/chosen": -1.2226316928863525, | |
| "logits/rejected": -1.2568190097808838, | |
| "logps/chosen": -0.4167971611022949, | |
| "logps/rejected": -0.44948896765708923, | |
| "loss": 2.9484, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -4.167971611022949, | |
| "rewards/margins": 0.3269180357456207, | |
| "rewards/rejected": -4.494889259338379, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.544, | |
| "grad_norm": 18.38315194030945, | |
| "learning_rate": 5.093272281150382e-07, | |
| "logits/chosen": -1.2616848945617676, | |
| "logits/rejected": -1.2981878519058228, | |
| "logps/chosen": -0.391302227973938, | |
| "logps/rejected": -0.434838205575943, | |
| "loss": 2.9307, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": -3.9130218029022217, | |
| "rewards/margins": 0.4353601336479187, | |
| "rewards/rejected": -4.348381996154785, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.5546666666666666, | |
| "grad_norm": 21.27143313038359, | |
| "learning_rate": 4.906727718849618e-07, | |
| "logits/chosen": -1.2721519470214844, | |
| "logits/rejected": -1.2690547704696655, | |
| "logps/chosen": -0.3983103334903717, | |
| "logps/rejected": -0.4260452687740326, | |
| "loss": 2.8974, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -3.9831039905548096, | |
| "rewards/margins": 0.2773493230342865, | |
| "rewards/rejected": -4.260452747344971, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5653333333333334, | |
| "grad_norm": 20.34815938237593, | |
| "learning_rate": 4.7203129873622036e-07, | |
| "logits/chosen": -1.3212722539901733, | |
| "logits/rejected": -1.3238407373428345, | |
| "logps/chosen": -0.4332142770290375, | |
| "logps/rejected": -0.5484018325805664, | |
| "loss": 2.988, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -4.332143306732178, | |
| "rewards/margins": 1.1518752574920654, | |
| "rewards/rejected": -5.484018802642822, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 22.40687761363269, | |
| "learning_rate": 4.534287567596188e-07, | |
| "logits/chosen": -1.2679193019866943, | |
| "logits/rejected": -1.2968288660049438, | |
| "logps/chosen": -0.406146377325058, | |
| "logps/rejected": -0.4383586049079895, | |
| "loss": 2.9959, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -4.061463356018066, | |
| "rewards/margins": 0.3221224844455719, | |
| "rewards/rejected": -4.3835859298706055, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5866666666666667, | |
| "grad_norm": 20.942148890287285, | |
| "learning_rate": 4.348910398555249e-07, | |
| "logits/chosen": -1.2546627521514893, | |
| "logits/rejected": -1.2904515266418457, | |
| "logps/chosen": -0.4170510172843933, | |
| "logps/rejected": -0.459602415561676, | |
| "loss": 3.0177, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -4.1705098152160645, | |
| "rewards/margins": 0.4255140423774719, | |
| "rewards/rejected": -4.596024036407471, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.5973333333333334, | |
| "grad_norm": 20.460111659042415, | |
| "learning_rate": 4.1644395169072575e-07, | |
| "logits/chosen": -1.235873818397522, | |
| "logits/rejected": -1.264666199684143, | |
| "logps/chosen": -0.3890388011932373, | |
| "logps/rejected": -0.3938077986240387, | |
| "loss": 3.0016, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -3.890387773513794, | |
| "rewards/margins": 0.04768957570195198, | |
| "rewards/rejected": -3.938077926635742, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.608, | |
| "grad_norm": 19.621676810019572, | |
| "learning_rate": 3.9811316978088615e-07, | |
| "logits/chosen": -1.2465546131134033, | |
| "logits/rejected": -1.2484326362609863, | |
| "logps/chosen": -0.4200409948825836, | |
| "logps/rejected": -0.4409112334251404, | |
| "loss": 2.9329, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -4.200409889221191, | |
| "rewards/margins": 0.20870289206504822, | |
| "rewards/rejected": -4.409112453460693, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.6186666666666667, | |
| "grad_norm": 21.426484978013306, | |
| "learning_rate": 3.799242097486038e-07, | |
| "logits/chosen": -1.313291072845459, | |
| "logits/rejected": -1.3121254444122314, | |
| "logps/chosen": -0.45259523391723633, | |
| "logps/rejected": -0.48710495233535767, | |
| "loss": 2.9039, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -4.525952339172363, | |
| "rewards/margins": 0.3450973331928253, | |
| "rewards/rejected": -4.871049404144287, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6293333333333333, | |
| "grad_norm": 19.38424744063439, | |
| "learning_rate": 3.619023898068123e-07, | |
| "logits/chosen": -1.2923152446746826, | |
| "logits/rejected": -1.304937720298767, | |
| "logps/chosen": -0.41213172674179077, | |
| "logps/rejected": -0.46351146697998047, | |
| "loss": 2.9142, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -4.121316909790039, | |
| "rewards/margins": 0.5137982964515686, | |
| "rewards/rejected": -4.635115146636963, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 18.14370228888205, | |
| "learning_rate": 3.4407279551696846e-07, | |
| "logits/chosen": -1.2836002111434937, | |
| "logits/rejected": -1.309725046157837, | |
| "logps/chosen": -0.4251108169555664, | |
| "logps/rejected": -0.44700655341148376, | |
| "loss": 2.9205, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -4.251107215881348, | |
| "rewards/margins": 0.21895785629749298, | |
| "rewards/rejected": -4.470065593719482, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6506666666666666, | |
| "grad_norm": 18.496567614089848, | |
| "learning_rate": 3.2646024487108213e-07, | |
| "logits/chosen": -1.284744381904602, | |
| "logits/rejected": -1.2811096906661987, | |
| "logps/chosen": -0.4097173810005188, | |
| "logps/rejected": -0.4487836956977844, | |
| "loss": 2.9462, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -4.097173690795898, | |
| "rewards/margins": 0.39066314697265625, | |
| "rewards/rejected": -4.487837314605713, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.6613333333333333, | |
| "grad_norm": 19.205847653386794, | |
| "learning_rate": 3.0908925374618887e-07, | |
| "logits/chosen": -1.26366126537323, | |
| "logits/rejected": -1.2661869525909424, | |
| "logps/chosen": -0.4183129668235779, | |
| "logps/rejected": -0.44671106338500977, | |
| "loss": 2.9514, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -4.18312931060791, | |
| "rewards/margins": 0.2839811444282532, | |
| "rewards/rejected": -4.467110633850098, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.672, | |
| "grad_norm": 20.240569305536415, | |
| "learning_rate": 2.91984001779353e-07, | |
| "logits/chosen": -1.212679386138916, | |
| "logits/rejected": -1.2541916370391846, | |
| "logps/chosen": -0.40088850259780884, | |
| "logps/rejected": -0.4241718351840973, | |
| "loss": 2.884, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -4.008884906768799, | |
| "rewards/margins": 0.23283371329307556, | |
| "rewards/rejected": -4.241718292236328, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.6826666666666666, | |
| "grad_norm": 19.477994840917976, | |
| "learning_rate": 2.751682987107029e-07, | |
| "logits/chosen": -1.3080363273620605, | |
| "logits/rejected": -1.33048415184021, | |
| "logps/chosen": -0.4709502160549164, | |
| "logps/rejected": -0.48716697096824646, | |
| "loss": 3.0093, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -4.709502220153809, | |
| "rewards/margins": 0.16216790676116943, | |
| "rewards/rejected": -4.871669769287109, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6933333333333334, | |
| "grad_norm": 19.392408354902603, | |
| "learning_rate": 2.5866555124134577e-07, | |
| "logits/chosen": -1.247816801071167, | |
| "logits/rejected": -1.2808668613433838, | |
| "logps/chosen": -0.40554946660995483, | |
| "logps/rejected": -0.41697534918785095, | |
| "loss": 2.9083, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -4.055495262145996, | |
| "rewards/margins": 0.11425850540399551, | |
| "rewards/rejected": -4.169753551483154, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.704, | |
| "grad_norm": 24.255125187042516, | |
| "learning_rate": 2.424987304522924e-07, | |
| "logits/chosen": -1.2667433023452759, | |
| "logits/rejected": -1.2772916555404663, | |
| "logps/chosen": -0.4439450800418854, | |
| "logps/rejected": -0.44936904311180115, | |
| "loss": 3.0007, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -4.439450740814209, | |
| "rewards/margins": 0.05423973873257637, | |
| "rewards/rejected": -4.493690490722656, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.7146666666666667, | |
| "grad_norm": 18.66997866326468, | |
| "learning_rate": 2.2669033982974944e-07, | |
| "logits/chosen": -1.2586668729782104, | |
| "logits/rejected": -1.2690373659133911, | |
| "logps/chosen": -0.4035263955593109, | |
| "logps/rejected": -0.4696914553642273, | |
| "loss": 2.9202, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -4.035264015197754, | |
| "rewards/margins": 0.6616507172584534, | |
| "rewards/rejected": -4.6969146728515625, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.7253333333333334, | |
| "grad_norm": 19.194048967943004, | |
| "learning_rate": 2.1126238394127867e-07, | |
| "logits/chosen": -1.2616932392120361, | |
| "logits/rejected": -1.301735281944275, | |
| "logps/chosen": -0.4147927165031433, | |
| "logps/rejected": -0.4699038863182068, | |
| "loss": 2.9179, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -4.147927284240723, | |
| "rewards/margins": 0.5511118173599243, | |
| "rewards/rejected": -4.699038505554199, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.736, | |
| "grad_norm": 20.972738719646298, | |
| "learning_rate": 1.9623633780643155e-07, | |
| "logits/chosen": -1.206508994102478, | |
| "logits/rejected": -1.2490206956863403, | |
| "logps/chosen": -0.3681824505329132, | |
| "logps/rejected": -0.3938142657279968, | |
| "loss": 2.9287, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -3.6818244457244873, | |
| "rewards/margins": 0.25631803274154663, | |
| "rewards/rejected": -3.9381422996520996, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.7466666666666667, | |
| "grad_norm": 19.685667725326027, | |
| "learning_rate": 1.8163311700448898e-07, | |
| "logits/chosen": -1.2758309841156006, | |
| "logits/rejected": -1.298648476600647, | |
| "logps/chosen": -0.41422510147094727, | |
| "logps/rejected": -0.4361799359321594, | |
| "loss": 2.9367, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -4.142251014709473, | |
| "rewards/margins": 0.2195480316877365, | |
| "rewards/rejected": -4.3617987632751465, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7573333333333333, | |
| "grad_norm": 18.702359306429475, | |
| "learning_rate": 1.674730485609166e-07, | |
| "logits/chosen": -1.2573009729385376, | |
| "logits/rejected": -1.2752147912979126, | |
| "logps/chosen": -0.4381086230278015, | |
| "logps/rejected": -0.47269415855407715, | |
| "loss": 3.0636, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -4.381086349487305, | |
| "rewards/margins": 0.34585532546043396, | |
| "rewards/rejected": -4.7269415855407715, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "grad_norm": 19.15975497260745, | |
| "learning_rate": 1.537758426530622e-07, | |
| "logits/chosen": -1.2939916849136353, | |
| "logits/rejected": -1.2921050786972046, | |
| "logps/chosen": -0.4421107769012451, | |
| "logps/rejected": -0.4742702543735504, | |
| "loss": 2.9573, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -4.421108245849609, | |
| "rewards/margins": 0.3215946853160858, | |
| "rewards/rejected": -4.742702960968018, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7786666666666666, | |
| "grad_norm": 20.412109989540898, | |
| "learning_rate": 1.4056056517447634e-07, | |
| "logits/chosen": -1.2623586654663086, | |
| "logits/rejected": -1.2932734489440918, | |
| "logps/chosen": -0.4066384732723236, | |
| "logps/rejected": -0.44183507561683655, | |
| "loss": 2.9361, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -4.066384315490723, | |
| "rewards/margins": 0.3519664406776428, | |
| "rewards/rejected": -4.418351173400879, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.7893333333333333, | |
| "grad_norm": 21.56479851024075, | |
| "learning_rate": 1.2784561119604682e-07, | |
| "logits/chosen": -1.2890852689743042, | |
| "logits/rejected": -1.2910048961639404, | |
| "logps/chosen": -0.4455583691596985, | |
| "logps/rejected": -0.5098209381103516, | |
| "loss": 2.96, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -4.455583572387695, | |
| "rewards/margins": 0.642626166343689, | |
| "rewards/rejected": -5.098209857940674, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 18.271145361200055, | |
| "learning_rate": 1.156486793608899e-07, | |
| "logits/chosen": -1.3208215236663818, | |
| "logits/rejected": -1.3235687017440796, | |
| "logps/chosen": -0.41836491227149963, | |
| "logps/rejected": -0.45064491033554077, | |
| "loss": 2.9402, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -4.183648586273193, | |
| "rewards/margins": 0.32280001044273376, | |
| "rewards/rejected": -4.506449222564697, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.8106666666666666, | |
| "grad_norm": 19.953182254510097, | |
| "learning_rate": 1.0398674724863581e-07, | |
| "logits/chosen": -1.3136857748031616, | |
| "logits/rejected": -1.3123706579208374, | |
| "logps/chosen": -0.4310145974159241, | |
| "logps/rejected": -0.49033123254776, | |
| "loss": 2.9092, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -4.310145378112793, | |
| "rewards/margins": 0.5931666493415833, | |
| "rewards/rejected": -4.9033122062683105, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8213333333333334, | |
| "grad_norm": 20.59979411954975, | |
| "learning_rate": 9.287604774340235e-08, | |
| "logits/chosen": -1.2864879369735718, | |
| "logits/rejected": -1.3099608421325684, | |
| "logps/chosen": -0.4408188760280609, | |
| "logps/rejected": -0.4660406708717346, | |
| "loss": 2.9032, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -4.408188819885254, | |
| "rewards/margins": 0.2522173821926117, | |
| "rewards/rejected": -4.660406112670898, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.832, | |
| "grad_norm": 21.750100354501434, | |
| "learning_rate": 8.233204643835234e-08, | |
| "logits/chosen": -1.2784446477890015, | |
| "logits/rejected": -1.3241978883743286, | |
| "logps/chosen": -0.41723695397377014, | |
| "logps/rejected": -0.48806482553482056, | |
| "loss": 2.8978, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -4.172369480133057, | |
| "rewards/margins": 0.708278477191925, | |
| "rewards/rejected": -4.880647659301758, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8426666666666667, | |
| "grad_norm": 20.384595741085338, | |
| "learning_rate": 7.236942010828429e-08, | |
| "logits/chosen": -1.3349159955978394, | |
| "logits/rejected": -1.3055683374404907, | |
| "logps/chosen": -0.43574801087379456, | |
| "logps/rejected": -0.4621052145957947, | |
| "loss": 2.9866, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -4.357480525970459, | |
| "rewards/margins": 0.26357167959213257, | |
| "rewards/rejected": -4.621052265167236, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.8533333333333334, | |
| "grad_norm": 18.501766671703546, | |
| "learning_rate": 6.300203628022271e-08, | |
| "logits/chosen": -1.248923897743225, | |
| "logits/rejected": -1.2598222494125366, | |
| "logps/chosen": -0.38643878698349, | |
| "logps/rejected": -0.39453864097595215, | |
| "loss": 2.9395, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -3.8643882274627686, | |
| "rewards/margins": 0.08099845796823502, | |
| "rewards/rejected": -3.9453868865966797, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.864, | |
| "grad_norm": 19.839337640876096, | |
| "learning_rate": 5.42429339304461e-08, | |
| "logits/chosen": -1.2822444438934326, | |
| "logits/rejected": -1.3219239711761475, | |
| "logps/chosen": -0.43799424171447754, | |
| "logps/rejected": -0.4629386067390442, | |
| "loss": 3.0149, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -4.379942417144775, | |
| "rewards/margins": 0.24944381415843964, | |
| "rewards/rejected": -4.629385948181152, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.8746666666666667, | |
| "grad_norm": 18.208971956367737, | |
| "learning_rate": 4.610430533481857e-08, | |
| "logits/chosen": -1.266296148300171, | |
| "logits/rejected": -1.303065538406372, | |
| "logps/chosen": -0.4157203137874603, | |
| "logps/rejected": -0.47199106216430664, | |
| "loss": 2.8876, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -4.15720272064209, | |
| "rewards/margins": 0.5627071857452393, | |
| "rewards/rejected": -4.719910144805908, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.8853333333333333, | |
| "grad_norm": 19.131190141361028, | |
| "learning_rate": 3.859747909769162e-08, | |
| "logits/chosen": -1.2796621322631836, | |
| "logits/rejected": -1.315230131149292, | |
| "logps/chosen": -0.4193892478942871, | |
| "logps/rejected": -0.48347288370132446, | |
| "loss": 2.8209, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -4.193892478942871, | |
| "rewards/margins": 0.6408361196517944, | |
| "rewards/rejected": -4.834728240966797, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.896, | |
| "grad_norm": 19.4485490322355, | |
| "learning_rate": 3.173290438299697e-08, | |
| "logits/chosen": -1.295986294746399, | |
| "logits/rejected": -1.3151133060455322, | |
| "logps/chosen": -0.4371556341648102, | |
| "logps/rejected": -0.4686927795410156, | |
| "loss": 2.96, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -4.371556758880615, | |
| "rewards/margins": 0.31537097692489624, | |
| "rewards/rejected": -4.686927318572998, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.9066666666666666, | |
| "grad_norm": 20.219307796360535, | |
| "learning_rate": 2.5520136369481194e-08, | |
| "logits/chosen": -1.2928402423858643, | |
| "logits/rejected": -1.3205971717834473, | |
| "logps/chosen": -0.46112942695617676, | |
| "logps/rejected": -0.51966392993927, | |
| "loss": 2.948, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -4.611294746398926, | |
| "rewards/margins": 0.5853451490402222, | |
| "rewards/rejected": -5.196639060974121, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.9173333333333333, | |
| "grad_norm": 20.610456554777507, | |
| "learning_rate": 1.996782295032745e-08, | |
| "logits/chosen": -1.3009055852890015, | |
| "logits/rejected": -1.3030388355255127, | |
| "logps/chosen": -0.40775948762893677, | |
| "logps/rejected": -0.4260484576225281, | |
| "loss": 2.9981, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -4.07759428024292, | |
| "rewards/margins": 0.18288996815681458, | |
| "rewards/rejected": -4.260484218597412, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.928, | |
| "grad_norm": 20.47490770082739, | |
| "learning_rate": 1.508369269567783e-08, | |
| "logits/chosen": -1.2779980897903442, | |
| "logits/rejected": -1.286833643913269, | |
| "logps/chosen": -0.4154781401157379, | |
| "logps/rejected": -0.45970821380615234, | |
| "loss": 2.9325, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -4.154781341552734, | |
| "rewards/margins": 0.44230085611343384, | |
| "rewards/rejected": -4.597081661224365, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.9386666666666666, | |
| "grad_norm": 21.154945951171282, | |
| "learning_rate": 1.0874544094811422e-08, | |
| "logits/chosen": -1.3095712661743164, | |
| "logits/rejected": -1.328394889831543, | |
| "logps/chosen": -0.46411657333374023, | |
| "logps/rejected": -0.5013660192489624, | |
| "loss": 2.9036, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -4.641165733337402, | |
| "rewards/margins": 0.3724942207336426, | |
| "rewards/rejected": -5.013659954071045, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9493333333333334, | |
| "grad_norm": 19.52631450128395, | |
| "learning_rate": 7.346236092954316e-09, | |
| "logits/chosen": -1.316333532333374, | |
| "logits/rejected": -1.3282887935638428, | |
| "logps/chosen": -0.4388408064842224, | |
| "logps/rejected": -0.479898065328598, | |
| "loss": 2.9824, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -4.388408184051514, | |
| "rewards/margins": 0.4105720520019531, | |
| "rewards/rejected": -4.798980712890625, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 20.824863365709117, | |
| "learning_rate": 4.50367993589107e-09, | |
| "logits/chosen": -1.316052794456482, | |
| "logits/rejected": -1.3359907865524292, | |
| "logps/chosen": -0.4431839883327484, | |
| "logps/rejected": -0.4758357107639313, | |
| "loss": 3.0019, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -4.431839942932129, | |
| "rewards/margins": 0.32651659846305847, | |
| "rewards/rejected": -4.758356094360352, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.9706666666666667, | |
| "grad_norm": 20.63831130072036, | |
| "learning_rate": 2.3508323337321224e-09, | |
| "logits/chosen": -1.3009384870529175, | |
| "logits/rejected": -1.3190171718597412, | |
| "logps/chosen": -0.4200662672519684, | |
| "logps/rejected": -0.4521716237068176, | |
| "loss": 2.9499, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -4.200663089752197, | |
| "rewards/margins": 0.3210526704788208, | |
| "rewards/rejected": -4.5217156410217285, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.9813333333333333, | |
| "grad_norm": 22.995746894078476, | |
| "learning_rate": 8.906899533517864e-10, | |
| "logits/chosen": -1.2918171882629395, | |
| "logits/rejected": -1.2942984104156494, | |
| "logps/chosen": -0.4635673463344574, | |
| "logps/rejected": -0.4919341504573822, | |
| "loss": 2.9469, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -4.635673999786377, | |
| "rewards/margins": 0.283668577671051, | |
| "rewards/rejected": -4.919342041015625, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.992, | |
| "grad_norm": 23.572574549303816, | |
| "learning_rate": 1.252852471625987e-10, | |
| "logits/chosen": -1.2587887048721313, | |
| "logits/rejected": -1.2847938537597656, | |
| "logps/chosen": -0.4143844246864319, | |
| "logps/rejected": -0.4563868045806885, | |
| "loss": 2.9443, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -4.143843650817871, | |
| "rewards/margins": 0.42002400755882263, | |
| "rewards/rejected": -4.563868522644043, | |
| "step": 465 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 468, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 300, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 181044392755200.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |