| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.996, | |
| "eval_steps": 500, | |
| "global_step": 83, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.012, | |
| "grad_norm": 0.8640882968902588, | |
| "learning_rate": 5.555555555555555e-08, | |
| "logits/chosen": -1.495650291442871, | |
| "logits/rejected": -1.3535889387130737, | |
| "logps/chosen": -0.10173828899860382, | |
| "logps/rejected": -0.08792766183614731, | |
| "loss": 0.8717, | |
| "rewards/accuracies": 0.3333333730697632, | |
| "rewards/chosen": -0.20347657799720764, | |
| "rewards/margins": -0.027621246874332428, | |
| "rewards/rejected": -0.17585532367229462, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.024, | |
| "grad_norm": 0.979682981967926, | |
| "learning_rate": 1.111111111111111e-07, | |
| "logits/chosen": -1.443913221359253, | |
| "logits/rejected": -1.3641024827957153, | |
| "logps/chosen": -0.10752908140420914, | |
| "logps/rejected": -0.08240076899528503, | |
| "loss": 0.8846, | |
| "rewards/accuracies": 0.2187500149011612, | |
| "rewards/chosen": -0.21505816280841827, | |
| "rewards/margins": -0.0502566322684288, | |
| "rewards/rejected": -0.16480153799057007, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.036, | |
| "grad_norm": 0.9309831857681274, | |
| "learning_rate": 1.6666666666666665e-07, | |
| "logits/chosen": -1.4732969999313354, | |
| "logits/rejected": -1.376213550567627, | |
| "logps/chosen": -0.10481980443000793, | |
| "logps/rejected": -0.07933054119348526, | |
| "loss": 0.8848, | |
| "rewards/accuracies": 0.28125, | |
| "rewards/chosen": -0.20963960886001587, | |
| "rewards/margins": -0.050978537648916245, | |
| "rewards/rejected": -0.15866108238697052, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.048, | |
| "grad_norm": 0.9682196974754333, | |
| "learning_rate": 2.222222222222222e-07, | |
| "logits/chosen": -1.5285106897354126, | |
| "logits/rejected": -1.3950246572494507, | |
| "logps/chosen": -0.11225953698158264, | |
| "logps/rejected": -0.08694332838058472, | |
| "loss": 0.8849, | |
| "rewards/accuracies": 0.3229166865348816, | |
| "rewards/chosen": -0.22451907396316528, | |
| "rewards/margins": -0.05063238739967346, | |
| "rewards/rejected": -0.17388665676116943, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 1.0820338726043701, | |
| "learning_rate": 2.7777777777777776e-07, | |
| "logits/chosen": -1.494052767753601, | |
| "logits/rejected": -1.3356661796569824, | |
| "logps/chosen": -0.12428013980388641, | |
| "logps/rejected": -0.09185128659009933, | |
| "loss": 0.8968, | |
| "rewards/accuracies": 0.322916716337204, | |
| "rewards/chosen": -0.24856027960777283, | |
| "rewards/margins": -0.06485769897699356, | |
| "rewards/rejected": -0.18370257318019867, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.072, | |
| "grad_norm": 1.0573477745056152, | |
| "learning_rate": 3.333333333333333e-07, | |
| "logits/chosen": -1.4825295209884644, | |
| "logits/rejected": -1.3897716999053955, | |
| "logps/chosen": -0.11380095779895782, | |
| "logps/rejected": -0.08133920282125473, | |
| "loss": 0.8938, | |
| "rewards/accuracies": 0.3020833432674408, | |
| "rewards/chosen": -0.22760191559791565, | |
| "rewards/margins": -0.06492353230714798, | |
| "rewards/rejected": -0.16267840564250946, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.084, | |
| "grad_norm": 0.9294664263725281, | |
| "learning_rate": 3.888888888888889e-07, | |
| "logits/chosen": -1.5021216869354248, | |
| "logits/rejected": -1.3295238018035889, | |
| "logps/chosen": -0.099081851541996, | |
| "logps/rejected": -0.08119318634271622, | |
| "loss": 0.8761, | |
| "rewards/accuracies": 0.2916666567325592, | |
| "rewards/chosen": -0.198163703083992, | |
| "rewards/margins": -0.03577733412384987, | |
| "rewards/rejected": -0.16238637268543243, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 1.0329580307006836, | |
| "learning_rate": 4.444444444444444e-07, | |
| "logits/chosen": -1.5048794746398926, | |
| "logits/rejected": -1.377795934677124, | |
| "logps/chosen": -0.12124787271022797, | |
| "logps/rejected": -0.08464659005403519, | |
| "loss": 0.9015, | |
| "rewards/accuracies": 0.3125, | |
| "rewards/chosen": -0.24249574542045593, | |
| "rewards/margins": -0.07320256531238556, | |
| "rewards/rejected": -0.16929318010807037, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.108, | |
| "grad_norm": 0.9767515063285828, | |
| "learning_rate": 5e-07, | |
| "logits/chosen": -1.5186768770217896, | |
| "logits/rejected": -1.3877286911010742, | |
| "logps/chosen": -0.10737170279026031, | |
| "logps/rejected": -0.0823729932308197, | |
| "loss": 0.8847, | |
| "rewards/accuracies": 0.3020833432674408, | |
| "rewards/chosen": -0.21474340558052063, | |
| "rewards/margins": -0.04999742656946182, | |
| "rewards/rejected": -0.1647459864616394, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 0.9653743505477905, | |
| "learning_rate": 4.997747415511704e-07, | |
| "logits/chosen": -1.4915320873260498, | |
| "logits/rejected": -1.3450301885604858, | |
| "logps/chosen": -0.10813666880130768, | |
| "logps/rejected": -0.08507634699344635, | |
| "loss": 0.8822, | |
| "rewards/accuracies": 0.2291666716337204, | |
| "rewards/chosen": -0.21627333760261536, | |
| "rewards/margins": -0.04612065851688385, | |
| "rewards/rejected": -0.1701526939868927, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.132, | |
| "grad_norm": 0.9533908367156982, | |
| "learning_rate": 4.990993721356315e-07, | |
| "logits/chosen": -1.4848368167877197, | |
| "logits/rejected": -1.3539990186691284, | |
| "logps/chosen": -0.11860796809196472, | |
| "logps/rejected": -0.08763512969017029, | |
| "loss": 0.8931, | |
| "rewards/accuracies": 0.25, | |
| "rewards/chosen": -0.23721593618392944, | |
| "rewards/margins": -0.061945684254169464, | |
| "rewards/rejected": -0.17527025938034058, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.144, | |
| "grad_norm": 0.9020726084709167, | |
| "learning_rate": 4.979751088147191e-07, | |
| "logits/chosen": -1.4897594451904297, | |
| "logits/rejected": -1.3646252155303955, | |
| "logps/chosen": -0.09957993775606155, | |
| "logps/rejected": -0.08218874782323837, | |
| "loss": 0.8759, | |
| "rewards/accuracies": 0.3958333432674408, | |
| "rewards/chosen": -0.1991598755121231, | |
| "rewards/margins": -0.034782394766807556, | |
| "rewards/rejected": -0.16437749564647675, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.156, | |
| "grad_norm": 1.0148029327392578, | |
| "learning_rate": 4.964039775869271e-07, | |
| "logits/chosen": -1.4809292554855347, | |
| "logits/rejected": -1.3655236959457397, | |
| "logps/chosen": -0.11366236209869385, | |
| "logps/rejected": -0.0875682383775711, | |
| "loss": 0.8868, | |
| "rewards/accuracies": 0.3645833432674408, | |
| "rewards/chosen": -0.2273247241973877, | |
| "rewards/margins": -0.05218825116753578, | |
| "rewards/rejected": -0.1751364767551422, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.168, | |
| "grad_norm": 1.334598422050476, | |
| "learning_rate": 4.943888097369216e-07, | |
| "logits/chosen": -1.4804601669311523, | |
| "logits/rejected": -1.3518431186676025, | |
| "logps/chosen": -0.13359910249710083, | |
| "logps/rejected": -0.08674132823944092, | |
| "loss": 0.9236, | |
| "rewards/accuracies": 0.2395833432674408, | |
| "rewards/chosen": -0.26719820499420166, | |
| "rewards/margins": -0.09371551126241684, | |
| "rewards/rejected": -0.17348265647888184, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 0.9834568500518799, | |
| "learning_rate": 4.919332367333748e-07, | |
| "logits/chosen": -1.4974645376205444, | |
| "logits/rejected": -1.3400018215179443, | |
| "logps/chosen": -0.10907550156116486, | |
| "logps/rejected": -0.08122590184211731, | |
| "loss": 0.8879, | |
| "rewards/accuracies": 0.21875, | |
| "rewards/chosen": -0.2181510031223297, | |
| "rewards/margins": -0.05569921433925629, | |
| "rewards/rejected": -0.16245180368423462, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 0.9361069798469543, | |
| "learning_rate": 4.890416836848127e-07, | |
| "logits/chosen": -1.4610369205474854, | |
| "logits/rejected": -1.369497299194336, | |
| "logps/chosen": -0.10335300862789154, | |
| "logps/rejected": -0.08525583893060684, | |
| "loss": 0.8765, | |
| "rewards/accuracies": 0.3541666865348816, | |
| "rewards/chosen": -0.20670601725578308, | |
| "rewards/margins": -0.03619435429573059, | |
| "rewards/rejected": -0.17051167786121368, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.204, | |
| "grad_norm": 0.910740077495575, | |
| "learning_rate": 4.85719361365271e-07, | |
| "logits/chosen": -1.5087649822235107, | |
| "logits/rejected": -1.3652905225753784, | |
| "logps/chosen": -0.09960392862558365, | |
| "logps/rejected": -0.08507402241230011, | |
| "loss": 0.8719, | |
| "rewards/accuracies": 0.3541666865348816, | |
| "rewards/chosen": -0.1992078572511673, | |
| "rewards/margins": -0.029059793800115585, | |
| "rewards/rejected": -0.17014804482460022, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.216, | |
| "grad_norm": 0.9233217239379883, | |
| "learning_rate": 4.819722568241273e-07, | |
| "logits/chosen": -1.4695565700531006, | |
| "logits/rejected": -1.336183786392212, | |
| "logps/chosen": -0.10343371331691742, | |
| "logps/rejected": -0.0857667475938797, | |
| "loss": 0.8769, | |
| "rewards/accuracies": 0.4166666865348816, | |
| "rewards/chosen": -0.20686742663383484, | |
| "rewards/margins": -0.03533393144607544, | |
| "rewards/rejected": -0.1715334951877594, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.228, | |
| "grad_norm": 0.8909419775009155, | |
| "learning_rate": 4.778071225970339e-07, | |
| "logits/chosen": -1.4759807586669922, | |
| "logits/rejected": -1.3495370149612427, | |
| "logps/chosen": -0.10235996544361115, | |
| "logps/rejected": -0.0851350948214531, | |
| "loss": 0.8757, | |
| "rewards/accuracies": 0.3229166567325592, | |
| "rewards/chosen": -0.2047199308872223, | |
| "rewards/margins": -0.03444972261786461, | |
| "rewards/rejected": -0.1702701896429062, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.9479613304138184, | |
| "learning_rate": 4.732314645373921e-07, | |
| "logits/chosen": -1.4673627614974976, | |
| "logits/rejected": -1.3651877641677856, | |
| "logps/chosen": -0.10552840679883957, | |
| "logps/rejected": -0.07911674678325653, | |
| "loss": 0.886, | |
| "rewards/accuracies": 0.28125, | |
| "rewards/chosen": -0.21105681359767914, | |
| "rewards/margins": -0.05282333493232727, | |
| "rewards/rejected": -0.15823349356651306, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.252, | |
| "grad_norm": 0.854266345500946, | |
| "learning_rate": 4.68253528290297e-07, | |
| "logits/chosen": -1.4877924919128418, | |
| "logits/rejected": -1.324515461921692, | |
| "logps/chosen": -0.1002732366323471, | |
| "logps/rejected": -0.09079625457525253, | |
| "loss": 0.8664, | |
| "rewards/accuracies": 0.3645833432674408, | |
| "rewards/chosen": -0.2005464732646942, | |
| "rewards/margins": -0.018953965976834297, | |
| "rewards/rejected": -0.18159250915050507, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.264, | |
| "grad_norm": 0.9627916812896729, | |
| "learning_rate": 4.6288228443332776e-07, | |
| "logits/chosen": -1.466447114944458, | |
| "logits/rejected": -1.3485172986984253, | |
| "logps/chosen": -0.10300128906965256, | |
| "logps/rejected": -0.08484764397144318, | |
| "loss": 0.8763, | |
| "rewards/accuracies": 0.3333333730697632, | |
| "rewards/chosen": -0.20600257813930511, | |
| "rewards/margins": -0.03630730137228966, | |
| "rewards/rejected": -0.16969528794288635, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.276, | |
| "grad_norm": 0.8689938187599182, | |
| "learning_rate": 4.571274123109605e-07, | |
| "logits/chosen": -1.4124252796173096, | |
| "logits/rejected": -1.3001394271850586, | |
| "logps/chosen": -0.10588695108890533, | |
| "logps/rejected": -0.08438257873058319, | |
| "loss": 0.8809, | |
| "rewards/accuracies": 0.2395833283662796, | |
| "rewards/chosen": -0.21177390217781067, | |
| "rewards/margins": -0.043008752167224884, | |
| "rewards/rejected": -0.16876515746116638, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 0.9801927208900452, | |
| "learning_rate": 4.5099928259173514e-07, | |
| "logits/chosen": -1.4522674083709717, | |
| "logits/rejected": -1.3461543321609497, | |
| "logps/chosen": -0.1134054884314537, | |
| "logps/rejected": -0.08870639652013779, | |
| "loss": 0.886, | |
| "rewards/accuracies": 0.2499999850988388, | |
| "rewards/chosen": -0.2268109768629074, | |
| "rewards/margins": -0.049398161470890045, | |
| "rewards/rejected": -0.17741279304027557, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 0.9185568690299988, | |
| "learning_rate": 4.4450893857960984e-07, | |
| "logits/chosen": -1.4601349830627441, | |
| "logits/rejected": -1.273384928703308, | |
| "logps/chosen": -0.10715562850236893, | |
| "logps/rejected": -0.09089934825897217, | |
| "loss": 0.8746, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -0.21431125700473785, | |
| "rewards/margins": -0.03251257538795471, | |
| "rewards/rejected": -0.18179869651794434, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.312, | |
| "grad_norm": 0.8864983320236206, | |
| "learning_rate": 4.3766807631318105e-07, | |
| "logits/chosen": -1.438947081565857, | |
| "logits/rejected": -1.3086212873458862, | |
| "logps/chosen": -0.10850708931684494, | |
| "logps/rejected": -0.0892128050327301, | |
| "loss": 0.8781, | |
| "rewards/accuracies": 0.3229166865348816, | |
| "rewards/chosen": -0.21701417863368988, | |
| "rewards/margins": -0.038588590919971466, | |
| "rewards/rejected": -0.1784256100654602, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.324, | |
| "grad_norm": 0.9518214464187622, | |
| "learning_rate": 4.3048902348863106e-07, | |
| "logits/chosen": -1.4363845586776733, | |
| "logits/rejected": -1.3129128217697144, | |
| "logps/chosen": -0.10809854418039322, | |
| "logps/rejected": -0.08249183744192123, | |
| "loss": 0.885, | |
| "rewards/accuracies": 0.2812500298023224, | |
| "rewards/chosen": -0.21619708836078644, | |
| "rewards/margins": -0.05121342092752457, | |
| "rewards/rejected": -0.16498367488384247, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.336, | |
| "grad_norm": 0.9291889667510986, | |
| "learning_rate": 4.2298471724438653e-07, | |
| "logits/chosen": -1.4329116344451904, | |
| "logits/rejected": -1.3157219886779785, | |
| "logps/chosen": -0.10641689598560333, | |
| "logps/rejected": -0.0836237370967865, | |
| "loss": 0.8825, | |
| "rewards/accuracies": 0.3541666567325592, | |
| "rewards/chosen": -0.21283379197120667, | |
| "rewards/margins": -0.04558631405234337, | |
| "rewards/rejected": -0.167247474193573, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.348, | |
| "grad_norm": 0.8883063197135925, | |
| "learning_rate": 4.151686808475203e-07, | |
| "logits/chosen": -1.4277637004852295, | |
| "logits/rejected": -1.2657394409179688, | |
| "logps/chosen": -0.1044892817735672, | |
| "logps/rejected": -0.08842873573303223, | |
| "loss": 0.8741, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/chosen": -0.2089785635471344, | |
| "rewards/margins": -0.03212107717990875, | |
| "rewards/rejected": -0.17685747146606445, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 0.9316148161888123, | |
| "learning_rate": 4.070549993239106e-07, | |
| "logits/chosen": -1.3881316184997559, | |
| "logits/rejected": -1.2497737407684326, | |
| "logps/chosen": -0.11017285287380219, | |
| "logps/rejected": -0.0868031308054924, | |
| "loss": 0.8842, | |
| "rewards/accuracies": 0.3645833432674408, | |
| "rewards/chosen": -0.22034570574760437, | |
| "rewards/margins": -0.046739429235458374, | |
| "rewards/rejected": -0.1736062616109848, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.372, | |
| "grad_norm": 0.8768561482429504, | |
| "learning_rate": 3.9865829407607166e-07, | |
| "logits/chosen": -1.4147083759307861, | |
| "logits/rejected": -1.2981321811676025, | |
| "logps/chosen": -0.10104362666606903, | |
| "logps/rejected": -0.09066756814718246, | |
| "loss": 0.8673, | |
| "rewards/accuracies": 0.3437500298023224, | |
| "rewards/chosen": -0.20208725333213806, | |
| "rewards/margins": -0.020752109587192535, | |
| "rewards/rejected": -0.18133513629436493, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 1.0077239274978638, | |
| "learning_rate": 3.8999369653439883e-07, | |
| "logits/chosen": -1.4155137538909912, | |
| "logits/rejected": -1.2743993997573853, | |
| "logps/chosen": -0.10902610421180725, | |
| "logps/rejected": -0.0863211378455162, | |
| "loss": 0.8827, | |
| "rewards/accuracies": 0.3020833432674408, | |
| "rewards/chosen": -0.2180522084236145, | |
| "rewards/margins": -0.0454099103808403, | |
| "rewards/rejected": -0.1726422756910324, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.396, | |
| "grad_norm": 0.9207706451416016, | |
| "learning_rate": 3.810768208893079e-07, | |
| "logits/chosen": -1.3758422136306763, | |
| "logits/rejected": -1.2915358543395996, | |
| "logps/chosen": -0.1034766435623169, | |
| "logps/rejected": -0.07977995276451111, | |
| "loss": 0.883, | |
| "rewards/accuracies": 0.3020833432674408, | |
| "rewards/chosen": -0.2069532871246338, | |
| "rewards/margins": -0.047393374145030975, | |
| "rewards/rejected": -0.15955990552902222, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.408, | |
| "grad_norm": 0.9844294190406799, | |
| "learning_rate": 3.7192373595340865e-07, | |
| "logits/chosen": -1.442295789718628, | |
| "logits/rejected": -1.2863086462020874, | |
| "logps/chosen": -0.10710459202528, | |
| "logps/rejected": -0.08674141019582748, | |
| "loss": 0.8796, | |
| "rewards/accuracies": 0.3020833432674408, | |
| "rewards/chosen": -0.21420918405056, | |
| "rewards/margins": -0.04072638228535652, | |
| "rewards/rejected": -0.17348282039165497, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 0.9682800769805908, | |
| "learning_rate": 3.625509362044183e-07, | |
| "logits/chosen": -1.3701705932617188, | |
| "logits/rejected": -1.2656924724578857, | |
| "logps/chosen": -0.11000403016805649, | |
| "logps/rejected": -0.08980907499790192, | |
| "loss": 0.8803, | |
| "rewards/accuracies": 0.3854166567325592, | |
| "rewards/chosen": -0.22000806033611298, | |
| "rewards/margins": -0.04038992151618004, | |
| "rewards/rejected": -0.17961814999580383, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.432, | |
| "grad_norm": 0.9004039168357849, | |
| "learning_rate": 3.529753120609982e-07, | |
| "logits/chosen": -1.4099225997924805, | |
| "logits/rejected": -1.252682089805603, | |
| "logps/chosen": -0.09707480669021606, | |
| "logps/rejected": -0.08859608322381973, | |
| "loss": 0.8653, | |
| "rewards/accuracies": 0.4479166865348816, | |
| "rewards/chosen": -0.19414961338043213, | |
| "rewards/margins": -0.016957445070147514, | |
| "rewards/rejected": -0.17719216644763947, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.444, | |
| "grad_norm": 0.9568714499473572, | |
| "learning_rate": 3.4321411944507714e-07, | |
| "logits/chosen": -1.3612836599349976, | |
| "logits/rejected": -1.2257184982299805, | |
| "logps/chosen": -0.10623904317617416, | |
| "logps/rejected": -0.09100136905908585, | |
| "loss": 0.8734, | |
| "rewards/accuracies": 0.4166666567325592, | |
| "rewards/chosen": -0.21247808635234833, | |
| "rewards/margins": -0.030475351959466934, | |
| "rewards/rejected": -0.1820027381181717, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.456, | |
| "grad_norm": 0.8658465147018433, | |
| "learning_rate": 3.332849486855144e-07, | |
| "logits/chosen": -1.381141185760498, | |
| "logits/rejected": -1.2495661973953247, | |
| "logps/chosen": -0.09047922492027283, | |
| "logps/rejected": -0.07975338399410248, | |
| "loss": 0.8676, | |
| "rewards/accuracies": 0.3958333432674408, | |
| "rewards/chosen": -0.18095844984054565, | |
| "rewards/margins": -0.021451696753501892, | |
| "rewards/rejected": -0.15950676798820496, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.468, | |
| "grad_norm": 0.9716626405715942, | |
| "learning_rate": 3.2320569281913754e-07, | |
| "logits/chosen": -1.3790628910064697, | |
| "logits/rejected": -1.251636028289795, | |
| "logps/chosen": -0.10486049205064774, | |
| "logps/rejected": -0.08277000486850739, | |
| "loss": 0.883, | |
| "rewards/accuracies": 0.3437500298023224, | |
| "rewards/chosen": -0.20972098410129547, | |
| "rewards/margins": -0.0441809706389904, | |
| "rewards/rejected": -0.16554000973701477, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.948078989982605, | |
| "learning_rate": 3.129945153462813e-07, | |
| "logits/chosen": -1.3937729597091675, | |
| "logits/rejected": -1.2695637941360474, | |
| "logps/chosen": -0.09631801396608353, | |
| "logps/rejected": -0.08585619181394577, | |
| "loss": 0.8673, | |
| "rewards/accuracies": 0.3854166865348816, | |
| "rewards/chosen": -0.19263602793216705, | |
| "rewards/margins": -0.020923633128404617, | |
| "rewards/rejected": -0.17171238362789154, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.492, | |
| "grad_norm": 0.9445565342903137, | |
| "learning_rate": 3.0266981749893154e-07, | |
| "logits/chosen": -1.3988057374954224, | |
| "logits/rejected": -1.2703980207443237, | |
| "logps/chosen": -0.09746824949979782, | |
| "logps/rejected": -0.0910995602607727, | |
| "loss": 0.8624, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -0.19493649899959564, | |
| "rewards/margins": -0.012737366370856762, | |
| "rewards/rejected": -0.1821991205215454, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.504, | |
| "grad_norm": 1.080769419670105, | |
| "learning_rate": 2.922502050804623e-07, | |
| "logits/chosen": -1.4094312191009521, | |
| "logits/rejected": -1.247127890586853, | |
| "logps/chosen": -0.09884171932935715, | |
| "logps/rejected": -0.08831708878278732, | |
| "loss": 0.8677, | |
| "rewards/accuracies": 0.3958333730697632, | |
| "rewards/chosen": -0.1976834386587143, | |
| "rewards/margins": -0.02104926109313965, | |
| "rewards/rejected": -0.17663417756557465, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.516, | |
| "grad_norm": 0.9974801540374756, | |
| "learning_rate": 2.8175445493671966e-07, | |
| "logits/chosen": -1.3815157413482666, | |
| "logits/rejected": -1.2270005941390991, | |
| "logps/chosen": -0.10922063887119293, | |
| "logps/rejected": -0.0900418609380722, | |
| "loss": 0.8809, | |
| "rewards/accuracies": 0.4583333730697632, | |
| "rewards/chosen": -0.21844127774238586, | |
| "rewards/margins": -0.03835754841566086, | |
| "rewards/rejected": -0.1800837218761444, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.528, | |
| "grad_norm": 0.9864545464515686, | |
| "learning_rate": 2.712014811188773e-07, | |
| "logits/chosen": -1.3654570579528809, | |
| "logits/rejected": -1.2366658449172974, | |
| "logps/chosen": -0.1124953106045723, | |
| "logps/rejected": -0.0964164212346077, | |
| "loss": 0.8745, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -0.2249906212091446, | |
| "rewards/margins": -0.0321577824652195, | |
| "rewards/rejected": -0.1928328424692154, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 0.9367190599441528, | |
| "learning_rate": 2.606103007990371e-07, | |
| "logits/chosen": -1.3880029916763306, | |
| "logits/rejected": -1.2659260034561157, | |
| "logps/chosen": -0.09874889254570007, | |
| "logps/rejected": -0.09255427867174149, | |
| "loss": 0.8628, | |
| "rewards/accuracies": 0.4062500298023224, | |
| "rewards/chosen": -0.19749778509140015, | |
| "rewards/margins": -0.012389198876917362, | |
| "rewards/rejected": -0.18510855734348297, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.552, | |
| "grad_norm": 0.9902623891830444, | |
| "learning_rate": 2.5e-07, | |
| "logits/chosen": -1.3836193084716797, | |
| "logits/rejected": -1.2308557033538818, | |
| "logps/chosen": -0.09668231755495071, | |
| "logps/rejected": -0.08627666532993317, | |
| "loss": 0.867, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -0.19336463510990143, | |
| "rewards/margins": -0.020811304450035095, | |
| "rewards/rejected": -0.17255333065986633, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.564, | |
| "grad_norm": 1.0337759256362915, | |
| "learning_rate": 2.3938969920096296e-07, | |
| "logits/chosen": -1.3623703718185425, | |
| "logits/rejected": -1.2246556282043457, | |
| "logps/chosen": -0.11106079071760178, | |
| "logps/rejected": -0.09072640538215637, | |
| "loss": 0.8814, | |
| "rewards/accuracies": 0.4479166865348816, | |
| "rewards/chosen": -0.22212158143520355, | |
| "rewards/margins": -0.040668785572052, | |
| "rewards/rejected": -0.18145281076431274, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 0.984899640083313, | |
| "learning_rate": 2.2879851888112278e-07, | |
| "logits/chosen": -1.3421802520751953, | |
| "logits/rejected": -1.198030710220337, | |
| "logps/chosen": -0.10126922279596329, | |
| "logps/rejected": -0.09068246185779572, | |
| "loss": 0.8687, | |
| "rewards/accuracies": 0.3958333730697632, | |
| "rewards/chosen": -0.20253844559192657, | |
| "rewards/margins": -0.021173518151044846, | |
| "rewards/rejected": -0.18136492371559143, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.588, | |
| "grad_norm": 1.0588122606277466, | |
| "learning_rate": 2.182455450632803e-07, | |
| "logits/chosen": -1.3651273250579834, | |
| "logits/rejected": -1.2209078073501587, | |
| "logps/chosen": -0.10214084386825562, | |
| "logps/rejected": -0.09154266119003296, | |
| "loss": 0.8675, | |
| "rewards/accuracies": 0.4166666865348816, | |
| "rewards/chosen": -0.20428168773651123, | |
| "rewards/margins": -0.02119637280702591, | |
| "rewards/rejected": -0.18308532238006592, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 1.0050568580627441, | |
| "learning_rate": 2.0774979491953776e-07, | |
| "logits/chosen": -1.3634512424468994, | |
| "logits/rejected": -1.1983171701431274, | |
| "logps/chosen": -0.09622293710708618, | |
| "logps/rejected": -0.09168636053800583, | |
| "loss": 0.8605, | |
| "rewards/accuracies": 0.4479166865348816, | |
| "rewards/chosen": -0.19244587421417236, | |
| "rewards/margins": -0.009073152206838131, | |
| "rewards/rejected": -0.18337272107601166, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.612, | |
| "grad_norm": 1.003779411315918, | |
| "learning_rate": 1.973301825010685e-07, | |
| "logits/chosen": -1.377637267112732, | |
| "logits/rejected": -1.2013237476348877, | |
| "logps/chosen": -0.09089501947164536, | |
| "logps/rejected": -0.09312086552381516, | |
| "loss": 0.8524, | |
| "rewards/accuracies": 0.5520833730697632, | |
| "rewards/chosen": -0.1817900389432907, | |
| "rewards/margins": 0.004451685585081577, | |
| "rewards/rejected": -0.1862417310476303, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.624, | |
| "grad_norm": 1.0647156238555908, | |
| "learning_rate": 1.8700548465371873e-07, | |
| "logits/chosen": -1.3391690254211426, | |
| "logits/rejected": -1.182291030883789, | |
| "logps/chosen": -0.10368049144744873, | |
| "logps/rejected": -0.09800291061401367, | |
| "loss": 0.8624, | |
| "rewards/accuracies": 0.4895833134651184, | |
| "rewards/chosen": -0.20736098289489746, | |
| "rewards/margins": -0.011355183087289333, | |
| "rewards/rejected": -0.19600582122802734, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.636, | |
| "grad_norm": 1.2042913436889648, | |
| "learning_rate": 1.767943071808624e-07, | |
| "logits/chosen": -1.3675341606140137, | |
| "logits/rejected": -1.2020319700241089, | |
| "logps/chosen": -0.10732070356607437, | |
| "logps/rejected": -0.09919053316116333, | |
| "loss": 0.871, | |
| "rewards/accuracies": 0.5416666865348816, | |
| "rewards/chosen": -0.21464140713214874, | |
| "rewards/margins": -0.01626037061214447, | |
| "rewards/rejected": -0.19838106632232666, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.648, | |
| "grad_norm": 0.9795694351196289, | |
| "learning_rate": 1.667150513144856e-07, | |
| "logits/chosen": -1.3203659057617188, | |
| "logits/rejected": -1.1565968990325928, | |
| "logps/chosen": -0.09574580192565918, | |
| "logps/rejected": -0.09623640775680542, | |
| "loss": 0.8547, | |
| "rewards/accuracies": 0.5520833730697632, | |
| "rewards/chosen": -0.19149160385131836, | |
| "rewards/margins": 0.000981215387582779, | |
| "rewards/rejected": -0.19247281551361084, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 1.0732730627059937, | |
| "learning_rate": 1.5678588055492286e-07, | |
| "logits/chosen": -1.2979214191436768, | |
| "logits/rejected": -1.150040626525879, | |
| "logps/chosen": -0.10067766904830933, | |
| "logps/rejected": -0.1018737256526947, | |
| "loss": 0.8545, | |
| "rewards/accuracies": 0.5104166865348816, | |
| "rewards/chosen": -0.20135533809661865, | |
| "rewards/margins": 0.0023921187967061996, | |
| "rewards/rejected": -0.2037474513053894, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.672, | |
| "grad_norm": 0.9587694406509399, | |
| "learning_rate": 1.4702468793900186e-07, | |
| "logits/chosen": -1.3606462478637695, | |
| "logits/rejected": -1.1573679447174072, | |
| "logps/chosen": -0.09973961114883423, | |
| "logps/rejected": -0.10131655633449554, | |
| "loss": 0.8535, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.19947922229766846, | |
| "rewards/margins": 0.003153874073177576, | |
| "rewards/rejected": -0.2026331126689911, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.684, | |
| "grad_norm": 1.0398699045181274, | |
| "learning_rate": 1.3744906379558164e-07, | |
| "logits/chosen": -1.3375906944274902, | |
| "logits/rejected": -1.1461068391799927, | |
| "logps/chosen": -0.10326816886663437, | |
| "logps/rejected": -0.10188017785549164, | |
| "loss": 0.857, | |
| "rewards/accuracies": 0.4687500298023224, | |
| "rewards/chosen": -0.20653633773326874, | |
| "rewards/margins": -0.002775975503027439, | |
| "rewards/rejected": -0.20376035571098328, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.696, | |
| "grad_norm": 1.0405802726745605, | |
| "learning_rate": 1.280762640465914e-07, | |
| "logits/chosen": -1.3368816375732422, | |
| "logits/rejected": -1.1643409729003906, | |
| "logps/chosen": -0.09207028895616531, | |
| "logps/rejected": -0.09347332268953323, | |
| "loss": 0.8534, | |
| "rewards/accuracies": 0.5208333134651184, | |
| "rewards/chosen": -0.18414057791233063, | |
| "rewards/margins": 0.002806063275784254, | |
| "rewards/rejected": -0.18694664537906647, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.708, | |
| "grad_norm": 1.0253372192382812, | |
| "learning_rate": 1.189231791106921e-07, | |
| "logits/chosen": -1.2978026866912842, | |
| "logits/rejected": -1.13652765750885, | |
| "logps/chosen": -0.10593652725219727, | |
| "logps/rejected": -0.10165742039680481, | |
| "loss": 0.8609, | |
| "rewards/accuracies": 0.479166716337204, | |
| "rewards/chosen": -0.21187305450439453, | |
| "rewards/margins": -0.008558189496397972, | |
| "rewards/rejected": -0.20331484079360962, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 1.2529534101486206, | |
| "learning_rate": 1.1000630346560116e-07, | |
| "logits/chosen": -1.3010480403900146, | |
| "logits/rejected": -1.133022665977478, | |
| "logps/chosen": -0.11126932501792908, | |
| "logps/rejected": -0.09786901623010635, | |
| "loss": 0.8738, | |
| "rewards/accuracies": 0.4895833432674408, | |
| "rewards/chosen": -0.22253865003585815, | |
| "rewards/margins": -0.026800617575645447, | |
| "rewards/rejected": -0.1957380324602127, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.732, | |
| "grad_norm": 0.9133132696151733, | |
| "learning_rate": 1.0134170592392836e-07, | |
| "logits/chosen": -1.3394014835357666, | |
| "logits/rejected": -1.1518943309783936, | |
| "logps/chosen": -0.09960196912288666, | |
| "logps/rejected": -0.10244160890579224, | |
| "loss": 0.8521, | |
| "rewards/accuracies": 0.5104166865348816, | |
| "rewards/chosen": -0.19920393824577332, | |
| "rewards/margins": 0.005679287016391754, | |
| "rewards/rejected": -0.20488321781158447, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.744, | |
| "grad_norm": 1.0934334993362427, | |
| "learning_rate": 9.29450006760894e-08, | |
| "logits/chosen": -1.3294323682785034, | |
| "logits/rejected": -1.1431366205215454, | |
| "logps/chosen": -0.10188900679349899, | |
| "logps/rejected": -0.09653942286968231, | |
| "loss": 0.8616, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.20377801358699799, | |
| "rewards/margins": -0.010699168778955936, | |
| "rewards/rejected": -0.19307884573936462, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.756, | |
| "grad_norm": 0.9888376593589783, | |
| "learning_rate": 8.483131915247967e-08, | |
| "logits/chosen": -1.3347070217132568, | |
| "logits/rejected": -1.167306661605835, | |
| "logps/chosen": -0.10506478697061539, | |
| "logps/rejected": -0.1007775291800499, | |
| "loss": 0.8603, | |
| "rewards/accuracies": 0.5000000596046448, | |
| "rewards/chosen": -0.21012957394123077, | |
| "rewards/margins": -0.008574524894356728, | |
| "rewards/rejected": -0.2015550583600998, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "grad_norm": 0.9486240744590759, | |
| "learning_rate": 7.701528275561347e-08, | |
| "logits/chosen": -1.3588067293167114, | |
| "logits/rejected": -1.1604478359222412, | |
| "logps/chosen": -0.0977708101272583, | |
| "logps/rejected": -0.09753303229808807, | |
| "loss": 0.8555, | |
| "rewards/accuracies": 0.5312500596046448, | |
| "rewards/chosen": -0.1955416202545166, | |
| "rewards/margins": -0.0004755451809614897, | |
| "rewards/rejected": -0.19506606459617615, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 1.033793568611145, | |
| "learning_rate": 6.951097651136889e-08, | |
| "logits/chosen": -1.3951979875564575, | |
| "logits/rejected": -1.125361680984497, | |
| "logps/chosen": -0.10307514667510986, | |
| "logps/rejected": -0.1052648052573204, | |
| "loss": 0.8525, | |
| "rewards/accuracies": 0.4895833432674408, | |
| "rewards/chosen": -0.20615029335021973, | |
| "rewards/margins": 0.0043793064542114735, | |
| "rewards/rejected": -0.2105296105146408, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.792, | |
| "grad_norm": 1.0308159589767456, | |
| "learning_rate": 6.233192368681889e-08, | |
| "logits/chosen": -1.3253390789031982, | |
| "logits/rejected": -1.1783477067947388, | |
| "logps/chosen": -0.09217476844787598, | |
| "logps/rejected": -0.09416632354259491, | |
| "loss": 0.8527, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.18434953689575195, | |
| "rewards/margins": 0.003983109723776579, | |
| "rewards/rejected": -0.18833264708518982, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.804, | |
| "grad_norm": 1.1081663370132446, | |
| "learning_rate": 5.5491061420390174e-08, | |
| "logits/chosen": -1.2740880250930786, | |
| "logits/rejected": -1.152024269104004, | |
| "logps/chosen": -0.10923092067241669, | |
| "logps/rejected": -0.10176312923431396, | |
| "loss": 0.8642, | |
| "rewards/accuracies": 0.5104166865348816, | |
| "rewards/chosen": -0.21846184134483337, | |
| "rewards/margins": -0.01493558008223772, | |
| "rewards/rejected": -0.20352625846862793, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.816, | |
| "grad_norm": 1.0150699615478516, | |
| "learning_rate": 4.900071740826489e-08, | |
| "logits/chosen": -1.348282814025879, | |
| "logits/rejected": -1.1383142471313477, | |
| "logps/chosen": -0.10064545273780823, | |
| "logps/rejected": -0.10860613733530045, | |
| "loss": 0.8459, | |
| "rewards/accuracies": 0.6041667461395264, | |
| "rewards/chosen": -0.20129090547561646, | |
| "rewards/margins": 0.01592138595879078, | |
| "rewards/rejected": -0.2172122746706009, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.828, | |
| "grad_norm": 1.123404860496521, | |
| "learning_rate": 4.287258768903948e-08, | |
| "logits/chosen": -1.3442084789276123, | |
| "logits/rejected": -1.146917462348938, | |
| "logps/chosen": -0.10861781984567642, | |
| "logps/rejected": -0.10071661323308945, | |
| "loss": 0.8688, | |
| "rewards/accuracies": 0.5833333134651184, | |
| "rewards/chosen": -0.21723563969135284, | |
| "rewards/margins": -0.015802428126335144, | |
| "rewards/rejected": -0.2014332264661789, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 0.9981204867362976, | |
| "learning_rate": 3.7117715566672176e-08, | |
| "logits/chosen": -1.3524047136306763, | |
| "logits/rejected": -1.1452308893203735, | |
| "logps/chosen": -0.10508691519498825, | |
| "logps/rejected": -0.10403452813625336, | |
| "loss": 0.8568, | |
| "rewards/accuracies": 0.5729166865348816, | |
| "rewards/chosen": -0.2101738303899765, | |
| "rewards/margins": -0.0021047808695584536, | |
| "rewards/rejected": -0.2080690562725067, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.852, | |
| "grad_norm": 1.0365066528320312, | |
| "learning_rate": 3.174647170970296e-08, | |
| "logits/chosen": -1.3592528104782104, | |
| "logits/rejected": -1.1138660907745361, | |
| "logps/chosen": -0.10330555588006973, | |
| "logps/rejected": -0.1064281240105629, | |
| "loss": 0.8524, | |
| "rewards/accuracies": 0.5729166865348816, | |
| "rewards/chosen": -0.20661111176013947, | |
| "rewards/margins": 0.006245152093470097, | |
| "rewards/rejected": -0.2128562480211258, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.864, | |
| "grad_norm": 1.076464295387268, | |
| "learning_rate": 2.6768535462607905e-08, | |
| "logits/chosen": -1.3058511018753052, | |
| "logits/rejected": -1.126082420349121, | |
| "logps/chosen": -0.10569944232702255, | |
| "logps/rejected": -0.11077728122472763, | |
| "loss": 0.8494, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.2113988846540451, | |
| "rewards/margins": 0.01015565823763609, | |
| "rewards/rejected": -0.22155456244945526, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.876, | |
| "grad_norm": 0.9264240264892578, | |
| "learning_rate": 2.2192877402966048e-08, | |
| "logits/chosen": -1.3490333557128906, | |
| "logits/rejected": -1.1134393215179443, | |
| "logps/chosen": -0.10448520630598068, | |
| "logps/rejected": -0.11283887922763824, | |
| "loss": 0.8457, | |
| "rewards/accuracies": 0.6145833730697632, | |
| "rewards/chosen": -0.20897041261196136, | |
| "rewards/margins": 0.016707373782992363, | |
| "rewards/rejected": -0.2256777584552765, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.888, | |
| "grad_norm": 1.067218542098999, | |
| "learning_rate": 1.8027743175872662e-08, | |
| "logits/chosen": -1.345249891281128, | |
| "logits/rejected": -1.1063634157180786, | |
| "logps/chosen": -0.11241614818572998, | |
| "logps/rejected": -0.11045221984386444, | |
| "loss": 0.8579, | |
| "rewards/accuracies": 0.5104166865348816, | |
| "rewards/chosen": -0.22483229637145996, | |
| "rewards/margins": -0.003927857149392366, | |
| "rewards/rejected": -0.22090443968772888, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 1.0710501670837402, | |
| "learning_rate": 1.4280638634728948e-08, | |
| "logits/chosen": -1.3244132995605469, | |
| "logits/rejected": -1.1485953330993652, | |
| "logps/chosen": -0.11838357150554657, | |
| "logps/rejected": -0.10926854610443115, | |
| "loss": 0.8683, | |
| "rewards/accuracies": 0.47916674613952637, | |
| "rewards/chosen": -0.23676714301109314, | |
| "rewards/margins": -0.018230034038424492, | |
| "rewards/rejected": -0.2185370922088623, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.912, | |
| "grad_norm": 1.1065670251846313, | |
| "learning_rate": 1.0958316315187289e-08, | |
| "logits/chosen": -1.317086100578308, | |
| "logits/rejected": -1.1413421630859375, | |
| "logps/chosen": -0.10257872194051743, | |
| "logps/rejected": -0.10372138023376465, | |
| "loss": 0.8542, | |
| "rewards/accuracies": 0.5416666269302368, | |
| "rewards/chosen": -0.20515744388103485, | |
| "rewards/margins": 0.0022853193804621696, | |
| "rewards/rejected": -0.2074427604675293, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.924, | |
| "grad_norm": 1.0825769901275635, | |
| "learning_rate": 8.066763266625282e-09, | |
| "logits/chosen": -1.3573386669158936, | |
| "logits/rejected": -1.109717607498169, | |
| "logps/chosen": -0.10758916288614273, | |
| "logps/rejected": -0.10599493980407715, | |
| "loss": 0.8597, | |
| "rewards/accuracies": 0.6250000596046448, | |
| "rewards/chosen": -0.21517832577228546, | |
| "rewards/margins": -0.0031884238123893738, | |
| "rewards/rejected": -0.2119898796081543, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.936, | |
| "grad_norm": 0.9156083464622498, | |
| "learning_rate": 5.611190263078463e-09, | |
| "logits/chosen": -1.309991717338562, | |
| "logits/rejected": -1.1210401058197021, | |
| "logps/chosen": -0.0973024070262909, | |
| "logps/rejected": -0.09729278087615967, | |
| "loss": 0.8551, | |
| "rewards/accuracies": 0.4583333432674408, | |
| "rewards/chosen": -0.1946048140525818, | |
| "rewards/margins": -1.9263941794633865e-05, | |
| "rewards/rejected": -0.19458556175231934, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.948, | |
| "grad_norm": 0.9994163513183594, | |
| "learning_rate": 3.5960224130728858e-09, | |
| "logits/chosen": -1.3023028373718262, | |
| "logits/rejected": -1.1124149560928345, | |
| "logps/chosen": -0.09809858351945877, | |
| "logps/rejected": -0.1080770194530487, | |
| "loss": 0.8436, | |
| "rewards/accuracies": 0.6354166865348816, | |
| "rewards/chosen": -0.19619716703891754, | |
| "rewards/margins": 0.01995689421892166, | |
| "rewards/rejected": -0.2161540389060974, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.9938739538192749, | |
| "learning_rate": 2.0248911852807917e-09, | |
| "logits/chosen": -1.3484057188034058, | |
| "logits/rejected": -1.0871906280517578, | |
| "logps/chosen": -0.11684219539165497, | |
| "logps/rejected": -0.11286689341068268, | |
| "loss": 0.8601, | |
| "rewards/accuracies": 0.5104166865348816, | |
| "rewards/chosen": -0.23368439078330994, | |
| "rewards/margins": -0.007950600236654282, | |
| "rewards/rejected": -0.22573378682136536, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.972, | |
| "grad_norm": 0.9481876492500305, | |
| "learning_rate": 9.006278643683696e-10, | |
| "logits/chosen": -1.3169337511062622, | |
| "logits/rejected": -1.1376291513442993, | |
| "logps/chosen": -0.09936561435461044, | |
| "logps/rejected": -0.10312428325414658, | |
| "loss": 0.8508, | |
| "rewards/accuracies": 0.5833333730697632, | |
| "rewards/chosen": -0.1987312287092209, | |
| "rewards/margins": 0.007517362013459206, | |
| "rewards/rejected": -0.20624856650829315, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.984, | |
| "grad_norm": 1.0620635747909546, | |
| "learning_rate": 2.2525844882964606e-10, | |
| "logits/chosen": -1.3467659950256348, | |
| "logits/rejected": -1.1462361812591553, | |
| "logps/chosen": -0.10522940754890442, | |
| "logps/rejected": -0.10153805464506149, | |
| "loss": 0.8594, | |
| "rewards/accuracies": 0.4687500298023224, | |
| "rewards/chosen": -0.21045881509780884, | |
| "rewards/margins": -0.007382689975202084, | |
| "rewards/rejected": -0.20307610929012299, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.996, | |
| "grad_norm": 1.0221400260925293, | |
| "learning_rate": 0.0, | |
| "logits/chosen": -1.313194751739502, | |
| "logits/rejected": -1.164141297340393, | |
| "logps/chosen": -0.10121379047632217, | |
| "logps/rejected": -0.1018705815076828, | |
| "loss": 0.8549, | |
| "rewards/accuracies": 0.5416666865348816, | |
| "rewards/chosen": -0.20242758095264435, | |
| "rewards/margins": 0.0013135506305843592, | |
| "rewards/rejected": -0.2037411630153656, | |
| "step": 83 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 83, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 305651334512640.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |