| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.997458513978173, | |
| "eval_steps": 200, | |
| "global_step": 2508, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02989983555090447, | |
| "grad_norm": 78.5, | |
| "learning_rate": 4.780876494023904e-07, | |
| "logits/chosen": -0.8346603512763977, | |
| "logits/rejected": -0.5625396966934204, | |
| "logps/chosen": -311.11248779296875, | |
| "logps/rejected": -290.71624755859375, | |
| "loss": 0.6974, | |
| "rewards/accuracies": 0.3199999928474426, | |
| "rewards/chosen": -0.005879516713321209, | |
| "rewards/margins": -0.0028140258509665728, | |
| "rewards/rejected": -0.003072815015912056, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.05979967110180894, | |
| "grad_norm": 112.5, | |
| "learning_rate": 9.760956175298805e-07, | |
| "logits/chosen": -0.8477816581726074, | |
| "logits/rejected": -0.5839244723320007, | |
| "logps/chosen": -341.1449890136719, | |
| "logps/rejected": -303.2749938964844, | |
| "loss": 0.6939, | |
| "rewards/accuracies": 0.33500000834465027, | |
| "rewards/chosen": -0.01889648474752903, | |
| "rewards/margins": 0.0013772583333775401, | |
| "rewards/rejected": -0.020271606743335724, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.08969950665271341, | |
| "grad_norm": 89.5, | |
| "learning_rate": 1.4741035856573708e-06, | |
| "logits/chosen": -0.7348077893257141, | |
| "logits/rejected": -0.419241338968277, | |
| "logps/chosen": -311.4237365722656, | |
| "logps/rejected": -284.5274963378906, | |
| "loss": 0.7, | |
| "rewards/accuracies": 0.28999999165534973, | |
| "rewards/chosen": -0.020579833537340164, | |
| "rewards/margins": -0.008827819488942623, | |
| "rewards/rejected": -0.011761474423110485, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.11959934220361788, | |
| "grad_norm": 89.5, | |
| "learning_rate": 1.9721115537848607e-06, | |
| "logits/chosen": -0.9120362997055054, | |
| "logits/rejected": -0.566675066947937, | |
| "logps/chosen": -322.989990234375, | |
| "logps/rejected": -276.8037414550781, | |
| "loss": 0.6868, | |
| "rewards/accuracies": 0.3675000071525574, | |
| "rewards/chosen": -0.027477417141199112, | |
| "rewards/margins": 0.018669739365577698, | |
| "rewards/rejected": -0.04612060636281967, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.14949917775452234, | |
| "grad_norm": 83.0, | |
| "learning_rate": 2.470119521912351e-06, | |
| "logits/chosen": -0.8410671353340149, | |
| "logits/rejected": -0.43034911155700684, | |
| "logps/chosen": -297.4024963378906, | |
| "logps/rejected": -304.4224853515625, | |
| "loss": 0.6832, | |
| "rewards/accuracies": 0.36000001430511475, | |
| "rewards/chosen": -0.05832824856042862, | |
| "rewards/margins": 0.02584075927734375, | |
| "rewards/rejected": -0.08419036865234375, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.17939901330542682, | |
| "grad_norm": 106.5, | |
| "learning_rate": 2.968127490039841e-06, | |
| "logits/chosen": -0.9279866814613342, | |
| "logits/rejected": -0.6811022758483887, | |
| "logps/chosen": -312.67498779296875, | |
| "logps/rejected": -285.7799987792969, | |
| "loss": 0.6709, | |
| "rewards/accuracies": 0.49000000953674316, | |
| "rewards/chosen": -0.07547790557146072, | |
| "rewards/margins": 0.056133728474378586, | |
| "rewards/rejected": -0.1316046118736267, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2092988488563313, | |
| "grad_norm": 96.0, | |
| "learning_rate": 3.466135458167331e-06, | |
| "logits/chosen": -0.8703573346138, | |
| "logits/rejected": -0.5601403713226318, | |
| "logps/chosen": -323.947509765625, | |
| "logps/rejected": -292.8074951171875, | |
| "loss": 0.6696, | |
| "rewards/accuracies": 0.4950000047683716, | |
| "rewards/chosen": -0.11684814095497131, | |
| "rewards/margins": 0.06319641321897507, | |
| "rewards/rejected": -0.1800549328327179, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.23919868440723577, | |
| "grad_norm": 99.0, | |
| "learning_rate": 3.9641434262948205e-06, | |
| "logits/chosen": -0.9258654713630676, | |
| "logits/rejected": -0.5686477422714233, | |
| "logps/chosen": -328.7449951171875, | |
| "logps/rejected": -316.5574951171875, | |
| "loss": 0.6579, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.1883123815059662, | |
| "rewards/margins": 0.09867187589406967, | |
| "rewards/rejected": -0.28693297505378723, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.23919868440723577, | |
| "eval_logits/chosen": -0.9216321706771851, | |
| "eval_logits/rejected": -0.7277408838272095, | |
| "eval_logps/chosen": -320.7849426269531, | |
| "eval_logps/rejected": -293.8709716796875, | |
| "eval_loss": 0.6465986371040344, | |
| "eval_rewards/accuracies": 0.560387909412384, | |
| "eval_rewards/chosen": -0.19119606912136078, | |
| "eval_rewards/margins": 0.1261032223701477, | |
| "eval_rewards/rejected": -0.31729716062545776, | |
| "eval_runtime": 877.9315, | |
| "eval_samples_per_second": 1.694, | |
| "eval_steps_per_second": 0.212, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2690985199581402, | |
| "grad_norm": 87.0, | |
| "learning_rate": 4.462151394422311e-06, | |
| "logits/chosen": -0.8007558584213257, | |
| "logits/rejected": -0.505867600440979, | |
| "logps/chosen": -320.7512512207031, | |
| "logps/rejected": -311.8299865722656, | |
| "loss": 0.6444, | |
| "rewards/accuracies": 0.5649999976158142, | |
| "rewards/chosen": -0.2540551722049713, | |
| "rewards/margins": 0.14147095382213593, | |
| "rewards/rejected": -0.3954962193965912, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.2989983555090447, | |
| "grad_norm": 96.5, | |
| "learning_rate": 4.960159362549802e-06, | |
| "logits/chosen": -0.9090196490287781, | |
| "logits/rejected": -0.6456773281097412, | |
| "logps/chosen": -323.7200012207031, | |
| "logps/rejected": -295.2149963378906, | |
| "loss": 0.6255, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.2805468738079071, | |
| "rewards/margins": 0.19930054247379303, | |
| "rewards/rejected": -0.47991272807121277, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.32889819105994916, | |
| "grad_norm": 91.0, | |
| "learning_rate": 4.9490474080638015e-06, | |
| "logits/chosen": -0.9534767270088196, | |
| "logits/rejected": -0.6329247951507568, | |
| "logps/chosen": -319.1549987792969, | |
| "logps/rejected": -283.88751220703125, | |
| "loss": 0.6192, | |
| "rewards/accuracies": 0.5924999713897705, | |
| "rewards/chosen": -0.29086607694625854, | |
| "rewards/margins": 0.23339904844760895, | |
| "rewards/rejected": -0.5240704417228699, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.35879802661085364, | |
| "grad_norm": 70.5, | |
| "learning_rate": 4.8936641559592385e-06, | |
| "logits/chosen": -0.9436456561088562, | |
| "logits/rejected": -0.7789434790611267, | |
| "logps/chosen": -349.5050048828125, | |
| "logps/rejected": -310.48748779296875, | |
| "loss": 0.627, | |
| "rewards/accuracies": 0.6349999904632568, | |
| "rewards/chosen": -0.30020782351493835, | |
| "rewards/margins": 0.23243407905101776, | |
| "rewards/rejected": -0.532727062702179, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.3886978621617581, | |
| "grad_norm": 101.0, | |
| "learning_rate": 4.838280903854675e-06, | |
| "logits/chosen": -0.9607565402984619, | |
| "logits/rejected": -0.7166936993598938, | |
| "logps/chosen": -317.0874938964844, | |
| "logps/rejected": -289.0824890136719, | |
| "loss": 0.5906, | |
| "rewards/accuracies": 0.6524999737739563, | |
| "rewards/chosen": -0.4176098704338074, | |
| "rewards/margins": 0.3300067186355591, | |
| "rewards/rejected": -0.7473974823951721, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.4185976977126626, | |
| "grad_norm": 94.0, | |
| "learning_rate": 4.782897651750112e-06, | |
| "logits/chosen": -0.9818115234375, | |
| "logits/rejected": -0.6833120584487915, | |
| "logps/chosen": -321.1875, | |
| "logps/rejected": -316.58624267578125, | |
| "loss": 0.577, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.4978076219558716, | |
| "rewards/margins": 0.39054566621780396, | |
| "rewards/rejected": -0.8884375095367432, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.44849753326356706, | |
| "grad_norm": 83.5, | |
| "learning_rate": 4.727514399645548e-06, | |
| "logits/chosen": -1.0211011171340942, | |
| "logits/rejected": -0.7218142747879028, | |
| "logps/chosen": -307.9674987792969, | |
| "logps/rejected": -288.7850036621094, | |
| "loss": 0.5544, | |
| "rewards/accuracies": 0.6974999904632568, | |
| "rewards/chosen": -0.4097009301185608, | |
| "rewards/margins": 0.4377111792564392, | |
| "rewards/rejected": -0.8475390672683716, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.47839736881447154, | |
| "grad_norm": 77.5, | |
| "learning_rate": 4.672131147540984e-06, | |
| "logits/chosen": -0.9680676460266113, | |
| "logits/rejected": -0.7582107782363892, | |
| "logps/chosen": -337.9375, | |
| "logps/rejected": -313.7749938964844, | |
| "loss": 0.5977, | |
| "rewards/accuracies": 0.6549999713897705, | |
| "rewards/chosen": -0.5489477515220642, | |
| "rewards/margins": 0.35999757051467896, | |
| "rewards/rejected": -0.9089636206626892, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.47839736881447154, | |
| "eval_logits/chosen": -1.041106939315796, | |
| "eval_logits/rejected": -0.8698605895042419, | |
| "eval_logps/chosen": -323.7284851074219, | |
| "eval_logps/rejected": -299.6156005859375, | |
| "eval_loss": 0.5722406506538391, | |
| "eval_rewards/accuracies": 0.6610022783279419, | |
| "eval_rewards/chosen": -0.4932539761066437, | |
| "eval_rewards/margins": 0.40423059463500977, | |
| "eval_rewards/rejected": -0.8973480463027954, | |
| "eval_runtime": 876.344, | |
| "eval_samples_per_second": 1.697, | |
| "eval_steps_per_second": 0.212, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.508297204365376, | |
| "grad_norm": 84.0, | |
| "learning_rate": 4.61674789543642e-06, | |
| "logits/chosen": -1.2390661239624023, | |
| "logits/rejected": -0.9836773872375488, | |
| "logps/chosen": -328.1875, | |
| "logps/rejected": -317.32501220703125, | |
| "loss": 0.5527, | |
| "rewards/accuracies": 0.6675000190734863, | |
| "rewards/chosen": -0.6254773139953613, | |
| "rewards/margins": 0.5287072658538818, | |
| "rewards/rejected": -1.153835415840149, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.5381970399162804, | |
| "grad_norm": 93.0, | |
| "learning_rate": 4.561364643331857e-06, | |
| "logits/chosen": -1.0737494230270386, | |
| "logits/rejected": -0.8683199882507324, | |
| "logps/chosen": -316.00250244140625, | |
| "logps/rejected": -295.9649963378906, | |
| "loss": 0.5736, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": -0.539447009563446, | |
| "rewards/margins": 0.46495360136032104, | |
| "rewards/rejected": -1.0048657655715942, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.5680968754671849, | |
| "grad_norm": 73.5, | |
| "learning_rate": 4.505981391227293e-06, | |
| "logits/chosen": -1.052968144416809, | |
| "logits/rejected": -0.7523078322410583, | |
| "logps/chosen": -318.50250244140625, | |
| "logps/rejected": -313.8175048828125, | |
| "loss": 0.5422, | |
| "rewards/accuracies": 0.7149999737739563, | |
| "rewards/chosen": -0.5196704268455505, | |
| "rewards/margins": 0.5570727586746216, | |
| "rewards/rejected": -1.0764819383621216, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.5979967110180894, | |
| "grad_norm": 70.0, | |
| "learning_rate": 4.4505981391227295e-06, | |
| "logits/chosen": -1.1461485624313354, | |
| "logits/rejected": -0.9354357719421387, | |
| "logps/chosen": -324.4750061035156, | |
| "logps/rejected": -294.0775146484375, | |
| "loss": 0.5415, | |
| "rewards/accuracies": 0.7074999809265137, | |
| "rewards/chosen": -0.518980085849762, | |
| "rewards/margins": 0.5734081864356995, | |
| "rewards/rejected": -1.092441439628601, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.6278965465689939, | |
| "grad_norm": 84.0, | |
| "learning_rate": 4.395214887018166e-06, | |
| "logits/chosen": -1.091801404953003, | |
| "logits/rejected": -0.8006445169448853, | |
| "logps/chosen": -323.1724853515625, | |
| "logps/rejected": -294.4674987792969, | |
| "loss": 0.5646, | |
| "rewards/accuracies": 0.6700000166893005, | |
| "rewards/chosen": -0.672253429889679, | |
| "rewards/margins": 0.5069983005523682, | |
| "rewards/rejected": -1.1792798042297363, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.6577963821198983, | |
| "grad_norm": 95.0, | |
| "learning_rate": 4.339831634913603e-06, | |
| "logits/chosen": -1.220596194267273, | |
| "logits/rejected": -0.9236291646957397, | |
| "logps/chosen": -316.7950134277344, | |
| "logps/rejected": -302.0824890136719, | |
| "loss": 0.5178, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.7468109130859375, | |
| "rewards/margins": 0.6105853319168091, | |
| "rewards/rejected": -1.3566796779632568, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.6876962176708028, | |
| "grad_norm": 100.0, | |
| "learning_rate": 4.284448382809039e-06, | |
| "logits/chosen": -1.0421770811080933, | |
| "logits/rejected": -0.7285050749778748, | |
| "logps/chosen": -308.42498779296875, | |
| "logps/rejected": -269.7037353515625, | |
| "loss": 0.5448, | |
| "rewards/accuracies": 0.6850000023841858, | |
| "rewards/chosen": -0.7317401170730591, | |
| "rewards/margins": 0.5794018507003784, | |
| "rewards/rejected": -1.3115381002426147, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.7175960532217073, | |
| "grad_norm": 97.5, | |
| "learning_rate": 4.229065130704476e-06, | |
| "logits/chosen": -1.1298235654830933, | |
| "logits/rejected": -0.7811802625656128, | |
| "logps/chosen": -322.0574951171875, | |
| "logps/rejected": -309.9750061035156, | |
| "loss": 0.5292, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.590954601764679, | |
| "rewards/margins": 0.6085253953933716, | |
| "rewards/rejected": -1.1989331245422363, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7175960532217073, | |
| "eval_logits/chosen": -1.078187346458435, | |
| "eval_logits/rejected": -0.9206746220588684, | |
| "eval_logps/chosen": -324.5967712402344, | |
| "eval_logps/rejected": -301.7204284667969, | |
| "eval_loss": 0.5492891669273376, | |
| "eval_rewards/accuracies": 0.6757872104644775, | |
| "eval_rewards/chosen": -0.5633505582809448, | |
| "eval_rewards/margins": 0.5408346652984619, | |
| "eval_rewards/rejected": -1.1038333177566528, | |
| "eval_runtime": 876.4047, | |
| "eval_samples_per_second": 1.697, | |
| "eval_steps_per_second": 0.212, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7474958887726117, | |
| "grad_norm": 87.5, | |
| "learning_rate": 4.173681878599912e-06, | |
| "logits/chosen": -1.1809699535369873, | |
| "logits/rejected": -0.8887664675712585, | |
| "logps/chosen": -303.6575012207031, | |
| "logps/rejected": -294.7774963378906, | |
| "loss": 0.5261, | |
| "rewards/accuracies": 0.7275000214576721, | |
| "rewards/chosen": -0.5871319770812988, | |
| "rewards/margins": 0.6293676495552063, | |
| "rewards/rejected": -1.2162939310073853, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.7773957243235162, | |
| "grad_norm": 99.5, | |
| "learning_rate": 4.118298626495348e-06, | |
| "logits/chosen": -1.1009465456008911, | |
| "logits/rejected": -0.9342904686927795, | |
| "logps/chosen": -338.12750244140625, | |
| "logps/rejected": -318.96624755859375, | |
| "loss": 0.5603, | |
| "rewards/accuracies": 0.6850000023841858, | |
| "rewards/chosen": -0.714611828327179, | |
| "rewards/margins": 0.6232568621635437, | |
| "rewards/rejected": -1.3377538919448853, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.8072955598744207, | |
| "grad_norm": 72.5, | |
| "learning_rate": 4.062915374390784e-06, | |
| "logits/chosen": -1.2523653507232666, | |
| "logits/rejected": -1.0046355724334717, | |
| "logps/chosen": -310.9049987792969, | |
| "logps/rejected": -297.67498779296875, | |
| "loss": 0.5135, | |
| "rewards/accuracies": 0.7099999785423279, | |
| "rewards/chosen": -0.7437072992324829, | |
| "rewards/margins": 0.6859521269798279, | |
| "rewards/rejected": -1.4290771484375, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.8371953954253252, | |
| "grad_norm": 89.0, | |
| "learning_rate": 4.007532122286221e-06, | |
| "logits/chosen": -1.2401965856552124, | |
| "logits/rejected": -0.8460285663604736, | |
| "logps/chosen": -336.927490234375, | |
| "logps/rejected": -318.7799987792969, | |
| "loss": 0.5186, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.7741259932518005, | |
| "rewards/margins": 0.7083032131195068, | |
| "rewards/rejected": -1.4823095798492432, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.8670952309762296, | |
| "grad_norm": 78.0, | |
| "learning_rate": 3.9521488701816575e-06, | |
| "logits/chosen": -1.1703033447265625, | |
| "logits/rejected": -0.9548498392105103, | |
| "logps/chosen": -287.87249755859375, | |
| "logps/rejected": -300.864990234375, | |
| "loss": 0.5476, | |
| "rewards/accuracies": 0.6825000047683716, | |
| "rewards/chosen": -0.8389843702316284, | |
| "rewards/margins": 0.608197033405304, | |
| "rewards/rejected": -1.447534203529358, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.8969950665271341, | |
| "grad_norm": 100.5, | |
| "learning_rate": 3.896765618077094e-06, | |
| "logits/chosen": -1.1477763652801514, | |
| "logits/rejected": -0.9038227796554565, | |
| "logps/chosen": -338.31500244140625, | |
| "logps/rejected": -319.9649963378906, | |
| "loss": 0.5148, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.8131677508354187, | |
| "rewards/margins": 0.7464379668235779, | |
| "rewards/rejected": -1.559140682220459, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.9268949020780386, | |
| "grad_norm": 92.0, | |
| "learning_rate": 3.84138236597253e-06, | |
| "logits/chosen": -1.2342950105667114, | |
| "logits/rejected": -0.946718156337738, | |
| "logps/chosen": -331.1512451171875, | |
| "logps/rejected": -304.0249938964844, | |
| "loss": 0.528, | |
| "rewards/accuracies": 0.7149999737739563, | |
| "rewards/chosen": -0.9154602289199829, | |
| "rewards/margins": 0.6957080364227295, | |
| "rewards/rejected": -1.6108520030975342, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.9567947376289431, | |
| "grad_norm": 102.0, | |
| "learning_rate": 3.7859991138679664e-06, | |
| "logits/chosen": -1.0906939506530762, | |
| "logits/rejected": -0.9649511575698853, | |
| "logps/chosen": -338.5637512207031, | |
| "logps/rejected": -338.4674987792969, | |
| "loss": 0.5151, | |
| "rewards/accuracies": 0.7200000286102295, | |
| "rewards/chosen": -0.859545886516571, | |
| "rewards/margins": 0.7704944014549255, | |
| "rewards/rejected": -1.630163550376892, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.9567947376289431, | |
| "eval_logits/chosen": -1.1360965967178345, | |
| "eval_logits/rejected": -0.9822049736976624, | |
| "eval_logps/chosen": -326.69891357421875, | |
| "eval_logps/rejected": -305.0, | |
| "eval_loss": 0.5390191674232483, | |
| "eval_rewards/accuracies": 0.687980055809021, | |
| "eval_rewards/chosen": -0.7810032367706299, | |
| "eval_rewards/margins": 0.6442182064056396, | |
| "eval_rewards/rejected": -1.4252588748931885, | |
| "eval_runtime": 876.4063, | |
| "eval_samples_per_second": 1.697, | |
| "eval_steps_per_second": 0.212, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.9866945731798475, | |
| "grad_norm": 84.5, | |
| "learning_rate": 3.730615861763403e-06, | |
| "logits/chosen": -1.2244549989700317, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -334.5425109863281, | |
| "logps/rejected": -339.23748779296875, | |
| "loss": 0.5275, | |
| "rewards/accuracies": 0.7149999737739563, | |
| "rewards/chosen": -0.8379321098327637, | |
| "rewards/margins": 0.715624988079071, | |
| "rewards/rejected": -1.554010033607483, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.0155479144864703, | |
| "grad_norm": 57.25, | |
| "learning_rate": 3.675232609658839e-06, | |
| "logits/chosen": -1.2397924661636353, | |
| "logits/rejected": -1.030158281326294, | |
| "logps/chosen": -320.9093322753906, | |
| "logps/rejected": -305.8393859863281, | |
| "loss": 0.4669, | |
| "rewards/accuracies": 0.7487046718597412, | |
| "rewards/chosen": -0.7694060206413269, | |
| "rewards/margins": 0.8478080630302429, | |
| "rewards/rejected": -1.6172634363174438, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.045447750037375, | |
| "grad_norm": 67.5, | |
| "learning_rate": 3.6198493575542758e-06, | |
| "logits/chosen": -1.2220094203948975, | |
| "logits/rejected": -0.9582018852233887, | |
| "logps/chosen": -318.0262451171875, | |
| "logps/rejected": -297.5799865722656, | |
| "loss": 0.4691, | |
| "rewards/accuracies": 0.7724999785423279, | |
| "rewards/chosen": -0.7301892042160034, | |
| "rewards/margins": 0.9199609160423279, | |
| "rewards/rejected": -1.6502331495285034, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.0753475855882793, | |
| "grad_norm": 73.5, | |
| "learning_rate": 3.564466105449712e-06, | |
| "logits/chosen": -1.089396357536316, | |
| "logits/rejected": -0.8958370685577393, | |
| "logps/chosen": -317.61749267578125, | |
| "logps/rejected": -295.4825134277344, | |
| "loss": 0.4746, | |
| "rewards/accuracies": 0.7574999928474426, | |
| "rewards/chosen": -0.8305737376213074, | |
| "rewards/margins": 0.8526538014411926, | |
| "rewards/rejected": -1.6829102039337158, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.1052474211391838, | |
| "grad_norm": 64.5, | |
| "learning_rate": 3.509082853345149e-06, | |
| "logits/chosen": -1.1403405666351318, | |
| "logits/rejected": -0.8662219047546387, | |
| "logps/chosen": -322.0574951171875, | |
| "logps/rejected": -323.2074890136719, | |
| "loss": 0.4641, | |
| "rewards/accuracies": 0.7649999856948853, | |
| "rewards/chosen": -0.6764746308326721, | |
| "rewards/margins": 0.8836804032325745, | |
| "rewards/rejected": -1.5600537061691284, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 1.1351472566900882, | |
| "grad_norm": 66.0, | |
| "learning_rate": 3.453699601240585e-06, | |
| "logits/chosen": -1.2375200986862183, | |
| "logits/rejected": -0.9549773931503296, | |
| "logps/chosen": -321.0874938964844, | |
| "logps/rejected": -306.6000061035156, | |
| "loss": 0.4201, | |
| "rewards/accuracies": 0.8224999904632568, | |
| "rewards/chosen": -0.7068628072738647, | |
| "rewards/margins": 1.0075805187225342, | |
| "rewards/rejected": -1.7146776914596558, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.1650470922409926, | |
| "grad_norm": 64.0, | |
| "learning_rate": 3.3983163491360217e-06, | |
| "logits/chosen": -1.1668496131896973, | |
| "logits/rejected": -0.8835460543632507, | |
| "logps/chosen": -320.69000244140625, | |
| "logps/rejected": -323.0425109863281, | |
| "loss": 0.459, | |
| "rewards/accuracies": 0.7825000286102295, | |
| "rewards/chosen": -0.7173047065734863, | |
| "rewards/margins": 0.9243432879447937, | |
| "rewards/rejected": -1.6417040824890137, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 1.1949469277918972, | |
| "grad_norm": 62.75, | |
| "learning_rate": 3.342933097031458e-06, | |
| "logits/chosen": -1.2166632413864136, | |
| "logits/rejected": -0.9624554514884949, | |
| "logps/chosen": -301.0849914550781, | |
| "logps/rejected": -304.3475036621094, | |
| "loss": 0.4656, | |
| "rewards/accuracies": 0.7850000262260437, | |
| "rewards/chosen": -0.7919347882270813, | |
| "rewards/margins": 0.9388867020606995, | |
| "rewards/rejected": -1.73046875, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.1949469277918972, | |
| "eval_logits/chosen": -1.160080075263977, | |
| "eval_logits/rejected": -1.0079379081726074, | |
| "eval_logps/chosen": -326.43280029296875, | |
| "eval_logps/rejected": -305.1102294921875, | |
| "eval_loss": 0.527574896812439, | |
| "eval_rewards/accuracies": 0.6892281174659729, | |
| "eval_rewards/chosen": -0.7565616369247437, | |
| "eval_rewards/margins": 0.6851438879966736, | |
| "eval_rewards/rejected": -1.4416320323944092, | |
| "eval_runtime": 876.3772, | |
| "eval_samples_per_second": 1.697, | |
| "eval_steps_per_second": 0.212, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.2248467633428017, | |
| "grad_norm": 84.0, | |
| "learning_rate": 3.2875498449268944e-06, | |
| "logits/chosen": -1.1776912212371826, | |
| "logits/rejected": -1.050445556640625, | |
| "logps/chosen": -343.0050048828125, | |
| "logps/rejected": -331.1875, | |
| "loss": 0.4213, | |
| "rewards/accuracies": 0.8050000071525574, | |
| "rewards/chosen": -0.6588146686553955, | |
| "rewards/margins": 1.0112402439117432, | |
| "rewards/rejected": -1.670253872871399, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 1.254746598893706, | |
| "grad_norm": 66.0, | |
| "learning_rate": 3.2321665928223306e-06, | |
| "logits/chosen": -1.2721245288848877, | |
| "logits/rejected": -0.9186769127845764, | |
| "logps/chosen": -316.4549865722656, | |
| "logps/rejected": -315.2925109863281, | |
| "loss": 0.4838, | |
| "rewards/accuracies": 0.7825000286102295, | |
| "rewards/chosen": -0.8342553973197937, | |
| "rewards/margins": 0.83197021484375, | |
| "rewards/rejected": -1.665708065032959, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.2846464344446105, | |
| "grad_norm": 62.75, | |
| "learning_rate": 3.176783340717767e-06, | |
| "logits/chosen": -1.1176886558532715, | |
| "logits/rejected": -0.9960334300994873, | |
| "logps/chosen": -328.32501220703125, | |
| "logps/rejected": -328.3450012207031, | |
| "loss": 0.4538, | |
| "rewards/accuracies": 0.7850000262260437, | |
| "rewards/chosen": -0.7273278832435608, | |
| "rewards/margins": 0.9573754668235779, | |
| "rewards/rejected": -1.684999942779541, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 1.314546269995515, | |
| "grad_norm": 84.5, | |
| "learning_rate": 3.1214000886132033e-06, | |
| "logits/chosen": -1.1655590534210205, | |
| "logits/rejected": -0.8922329545021057, | |
| "logps/chosen": -314.9700012207031, | |
| "logps/rejected": -301.5050048828125, | |
| "loss": 0.4483, | |
| "rewards/accuracies": 0.7850000262260437, | |
| "rewards/chosen": -0.6278771758079529, | |
| "rewards/margins": 0.9427502155303955, | |
| "rewards/rejected": -1.5707299709320068, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.3444461055464194, | |
| "grad_norm": 69.5, | |
| "learning_rate": 3.06601683650864e-06, | |
| "logits/chosen": -1.2217812538146973, | |
| "logits/rejected": -0.976731538772583, | |
| "logps/chosen": -324.7850036621094, | |
| "logps/rejected": -316.4599914550781, | |
| "loss": 0.4368, | |
| "rewards/accuracies": 0.8149999976158142, | |
| "rewards/chosen": -0.7704944014549255, | |
| "rewards/margins": 0.9598730206489563, | |
| "rewards/rejected": -1.7300487756729126, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 1.374345941097324, | |
| "grad_norm": 81.0, | |
| "learning_rate": 3.010633584404076e-06, | |
| "logits/chosen": -1.203802466392517, | |
| "logits/rejected": -0.9061872959136963, | |
| "logps/chosen": -330.4175109863281, | |
| "logps/rejected": -312.9987487792969, | |
| "loss": 0.4787, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.7830480933189392, | |
| "rewards/margins": 0.9129126071929932, | |
| "rewards/rejected": -1.6956127882003784, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.4042457766482284, | |
| "grad_norm": 118.0, | |
| "learning_rate": 2.955250332299513e-06, | |
| "logits/chosen": -1.1928298473358154, | |
| "logits/rejected": -0.8999917507171631, | |
| "logps/chosen": -320.2650146484375, | |
| "logps/rejected": -301.5299987792969, | |
| "loss": 0.4698, | |
| "rewards/accuracies": 0.7549999952316284, | |
| "rewards/chosen": -0.8731860518455505, | |
| "rewards/margins": 0.9074377417564392, | |
| "rewards/rejected": -1.7800854444503784, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 1.434145612199133, | |
| "grad_norm": 65.0, | |
| "learning_rate": 2.8998670801949493e-06, | |
| "logits/chosen": -1.1984894275665283, | |
| "logits/rejected": -0.9353277683258057, | |
| "logps/chosen": -317.625, | |
| "logps/rejected": -325.4075012207031, | |
| "loss": 0.4502, | |
| "rewards/accuracies": 0.7674999833106995, | |
| "rewards/chosen": -0.9375879168510437, | |
| "rewards/margins": 0.9699438214302063, | |
| "rewards/rejected": -1.9072656631469727, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.434145612199133, | |
| "eval_logits/chosen": -1.156473159790039, | |
| "eval_logits/rejected": -1.006028413772583, | |
| "eval_logps/chosen": -327.82794189453125, | |
| "eval_logps/rejected": -306.8521423339844, | |
| "eval_loss": 0.5231196284294128, | |
| "eval_rewards/accuracies": 0.6926843523979187, | |
| "eval_rewards/chosen": -0.8996713161468506, | |
| "eval_rewards/margins": 0.7130159735679626, | |
| "eval_rewards/rejected": -1.6129347085952759, | |
| "eval_runtime": 876.3506, | |
| "eval_samples_per_second": 1.697, | |
| "eval_steps_per_second": 0.212, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.4640454477500373, | |
| "grad_norm": 99.5, | |
| "learning_rate": 2.844483828090386e-06, | |
| "logits/chosen": -1.339633822441101, | |
| "logits/rejected": -1.035129427909851, | |
| "logps/chosen": -332.54998779296875, | |
| "logps/rejected": -319.13751220703125, | |
| "loss": 0.4421, | |
| "rewards/accuracies": 0.7799999713897705, | |
| "rewards/chosen": -0.8549670577049255, | |
| "rewards/margins": 1.0162646770477295, | |
| "rewards/rejected": -1.8712304830551147, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 1.493945283300942, | |
| "grad_norm": 83.5, | |
| "learning_rate": 2.789100575985822e-06, | |
| "logits/chosen": -1.1476205587387085, | |
| "logits/rejected": -0.9250108599662781, | |
| "logps/chosen": -322.0050048828125, | |
| "logps/rejected": -309.3500061035156, | |
| "loss": 0.4555, | |
| "rewards/accuracies": 0.7549999952316284, | |
| "rewards/chosen": -0.8130224347114563, | |
| "rewards/margins": 0.9434008598327637, | |
| "rewards/rejected": -1.7563867568969727, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.5238451188518463, | |
| "grad_norm": 63.75, | |
| "learning_rate": 2.7337173238812586e-06, | |
| "logits/chosen": -1.2015457153320312, | |
| "logits/rejected": -0.8530246019363403, | |
| "logps/chosen": -309.01251220703125, | |
| "logps/rejected": -297.7825012207031, | |
| "loss": 0.4501, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -0.836810290813446, | |
| "rewards/margins": 0.9292749166488647, | |
| "rewards/rejected": -1.7654907703399658, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 1.5537449544027506, | |
| "grad_norm": 67.0, | |
| "learning_rate": 2.6783340717766948e-06, | |
| "logits/chosen": -1.2457306385040283, | |
| "logits/rejected": -1.0591107606887817, | |
| "logps/chosen": -337.9775085449219, | |
| "logps/rejected": -308.5375061035156, | |
| "loss": 0.4248, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.7735278606414795, | |
| "rewards/margins": 1.035646915435791, | |
| "rewards/rejected": -1.8087304830551147, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.5836447899536552, | |
| "grad_norm": 51.0, | |
| "learning_rate": 2.6229508196721314e-06, | |
| "logits/chosen": -1.216982126235962, | |
| "logits/rejected": -0.8925817608833313, | |
| "logps/chosen": -333.2349853515625, | |
| "logps/rejected": -316.62249755859375, | |
| "loss": 0.4568, | |
| "rewards/accuracies": 0.7850000262260437, | |
| "rewards/chosen": -0.8274877667427063, | |
| "rewards/margins": 0.9530566334724426, | |
| "rewards/rejected": -1.7805664539337158, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 1.6135446255045598, | |
| "grad_norm": 82.0, | |
| "learning_rate": 2.5675675675675675e-06, | |
| "logits/chosen": -1.3132140636444092, | |
| "logits/rejected": -1.004296898841858, | |
| "logps/chosen": -342.4949951171875, | |
| "logps/rejected": -317.69500732421875, | |
| "loss": 0.429, | |
| "rewards/accuracies": 0.8050000071525574, | |
| "rewards/chosen": -0.9008423089981079, | |
| "rewards/margins": 1.0281542539596558, | |
| "rewards/rejected": -1.9285448789596558, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.6434444610554642, | |
| "grad_norm": 116.5, | |
| "learning_rate": 2.5121843154630045e-06, | |
| "logits/chosen": -1.1408294439315796, | |
| "logits/rejected": -0.9321377277374268, | |
| "logps/chosen": -335.291259765625, | |
| "logps/rejected": -321.29376220703125, | |
| "loss": 0.453, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.8236993551254272, | |
| "rewards/margins": 0.9510498046875, | |
| "rewards/rejected": -1.77447509765625, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 1.6733442966063685, | |
| "grad_norm": 91.0, | |
| "learning_rate": 2.4568010633584403e-06, | |
| "logits/chosen": -1.1858936548233032, | |
| "logits/rejected": -0.9579010009765625, | |
| "logps/chosen": -320.9949951171875, | |
| "logps/rejected": -296.3374938964844, | |
| "loss": 0.4699, | |
| "rewards/accuracies": 0.7425000071525574, | |
| "rewards/chosen": -0.8678625226020813, | |
| "rewards/margins": 0.9215136766433716, | |
| "rewards/rejected": -1.7896509170532227, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.6733442966063685, | |
| "eval_logits/chosen": -1.1674253940582275, | |
| "eval_logits/rejected": -1.0171688795089722, | |
| "eval_logps/chosen": -327.3978576660156, | |
| "eval_logps/rejected": -306.6209716796875, | |
| "eval_loss": 0.5191056728363037, | |
| "eval_rewards/accuracies": 0.6933563947677612, | |
| "eval_rewards/chosen": -0.8476693630218506, | |
| "eval_rewards/margins": 0.7431673407554626, | |
| "eval_rewards/rejected": -1.5906811952590942, | |
| "eval_runtime": 876.3262, | |
| "eval_samples_per_second": 1.697, | |
| "eval_steps_per_second": 0.212, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.703244132157273, | |
| "grad_norm": 82.0, | |
| "learning_rate": 2.401417811253877e-06, | |
| "logits/chosen": -1.1833282709121704, | |
| "logits/rejected": -0.9263910055160522, | |
| "logps/chosen": -324.5150146484375, | |
| "logps/rejected": -316.1650085449219, | |
| "loss": 0.451, | |
| "rewards/accuracies": 0.7799999713897705, | |
| "rewards/chosen": -0.8199084401130676, | |
| "rewards/margins": 0.9980810284614563, | |
| "rewards/rejected": -1.8175097703933716, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 1.7331439677081777, | |
| "grad_norm": 99.0, | |
| "learning_rate": 2.3460345591493135e-06, | |
| "logits/chosen": -1.1936352252960205, | |
| "logits/rejected": -1.0041576623916626, | |
| "logps/chosen": -350.885009765625, | |
| "logps/rejected": -327.0450134277344, | |
| "loss": 0.4702, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.9122155904769897, | |
| "rewards/margins": 0.9335852265357971, | |
| "rewards/rejected": -1.8462109565734863, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.763043803259082, | |
| "grad_norm": 59.5, | |
| "learning_rate": 2.2906513070447496e-06, | |
| "logits/chosen": -1.3379946947097778, | |
| "logits/rejected": -1.0853075981140137, | |
| "logps/chosen": -299.1099853515625, | |
| "logps/rejected": -299.9725036621094, | |
| "loss": 0.4607, | |
| "rewards/accuracies": 0.7850000262260437, | |
| "rewards/chosen": -0.905989408493042, | |
| "rewards/margins": 1.0363476276397705, | |
| "rewards/rejected": -1.942041039466858, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 1.7929436388099864, | |
| "grad_norm": 102.0, | |
| "learning_rate": 2.235268054940186e-06, | |
| "logits/chosen": -1.1545830965042114, | |
| "logits/rejected": -0.8675525188446045, | |
| "logps/chosen": -321.79998779296875, | |
| "logps/rejected": -300.4262390136719, | |
| "loss": 0.4854, | |
| "rewards/accuracies": 0.7425000071525574, | |
| "rewards/chosen": -0.8690832257270813, | |
| "rewards/margins": 0.9056127667427063, | |
| "rewards/rejected": -1.7749096155166626, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.822843474360891, | |
| "grad_norm": 60.0, | |
| "learning_rate": 2.179884802835623e-06, | |
| "logits/chosen": -1.2606717348098755, | |
| "logits/rejected": -1.0567920207977295, | |
| "logps/chosen": -328.82501220703125, | |
| "logps/rejected": -304.1050109863281, | |
| "loss": 0.4552, | |
| "rewards/accuracies": 0.7850000262260437, | |
| "rewards/chosen": -0.743670642375946, | |
| "rewards/margins": 1.0134960412979126, | |
| "rewards/rejected": -1.7573193311691284, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 1.8527433099117956, | |
| "grad_norm": 59.5, | |
| "learning_rate": 2.124501550731059e-06, | |
| "logits/chosen": -1.2121707201004028, | |
| "logits/rejected": -1.002629041671753, | |
| "logps/chosen": -323.5950012207031, | |
| "logps/rejected": -317.5299987792969, | |
| "loss": 0.4645, | |
| "rewards/accuracies": 0.7674999833106995, | |
| "rewards/chosen": -0.9758337140083313, | |
| "rewards/margins": 0.9835278391838074, | |
| "rewards/rejected": -1.959287166595459, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.8826431454627, | |
| "grad_norm": 71.0, | |
| "learning_rate": 2.0691182986264955e-06, | |
| "logits/chosen": -1.296298861503601, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -325.7699890136719, | |
| "logps/rejected": -299.322509765625, | |
| "loss": 0.4515, | |
| "rewards/accuracies": 0.7599999904632568, | |
| "rewards/chosen": -0.8331592082977295, | |
| "rewards/margins": 0.9821679592132568, | |
| "rewards/rejected": -1.8158252239227295, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 1.9125429810136043, | |
| "grad_norm": 70.0, | |
| "learning_rate": 2.0137350465219317e-06, | |
| "logits/chosen": -1.2260925769805908, | |
| "logits/rejected": -0.9426334500312805, | |
| "logps/chosen": -330.06500244140625, | |
| "logps/rejected": -309.68499755859375, | |
| "loss": 0.4436, | |
| "rewards/accuracies": 0.7649999856948853, | |
| "rewards/chosen": -0.830242931842804, | |
| "rewards/margins": 0.9743407964706421, | |
| "rewards/rejected": -1.804931640625, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.9125429810136043, | |
| "eval_logits/chosen": -1.1829742193222046, | |
| "eval_logits/rejected": -1.033914566040039, | |
| "eval_logps/chosen": -327.43011474609375, | |
| "eval_logps/rejected": -306.69085693359375, | |
| "eval_loss": 0.5206477046012878, | |
| "eval_rewards/accuracies": 0.6974846720695496, | |
| "eval_rewards/chosen": -0.8544062376022339, | |
| "eval_rewards/margins": 0.7440763115882874, | |
| "eval_rewards/rejected": -1.598265290260315, | |
| "eval_runtime": 876.3416, | |
| "eval_samples_per_second": 1.697, | |
| "eval_steps_per_second": 0.212, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.942442816564509, | |
| "grad_norm": 73.5, | |
| "learning_rate": 1.9583517944173683e-06, | |
| "logits/chosen": -1.246303677558899, | |
| "logits/rejected": -0.9357275366783142, | |
| "logps/chosen": -332.3599853515625, | |
| "logps/rejected": -309.1700134277344, | |
| "loss": 0.4702, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.8381909132003784, | |
| "rewards/margins": 0.9997217059135437, | |
| "rewards/rejected": -1.837497591972351, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 1.9723426521154135, | |
| "grad_norm": 68.5, | |
| "learning_rate": 1.9029685423128047e-06, | |
| "logits/chosen": -1.2618129253387451, | |
| "logits/rejected": -1.0779250860214233, | |
| "logps/chosen": -339.9324951171875, | |
| "logps/rejected": -318.04998779296875, | |
| "loss": 0.4583, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.8390514850616455, | |
| "rewards/margins": 1.0396826267242432, | |
| "rewards/rejected": -1.878564476966858, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.0011959934220362, | |
| "grad_norm": 97.0, | |
| "learning_rate": 1.847585290208241e-06, | |
| "logits/chosen": -1.2342288494110107, | |
| "logits/rejected": -0.9683116674423218, | |
| "logps/chosen": -332.2409362792969, | |
| "logps/rejected": -321.0531005859375, | |
| "loss": 0.424, | |
| "rewards/accuracies": 0.7642487287521362, | |
| "rewards/chosen": -0.7630558013916016, | |
| "rewards/margins": 1.0779491662979126, | |
| "rewards/rejected": -1.8409063816070557, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 2.0310958289729406, | |
| "grad_norm": 76.0, | |
| "learning_rate": 1.7922020381036776e-06, | |
| "logits/chosen": -1.318371295928955, | |
| "logits/rejected": -1.0083489418029785, | |
| "logps/chosen": -327.114990234375, | |
| "logps/rejected": -336.697509765625, | |
| "loss": 0.3965, | |
| "rewards/accuracies": 0.8475000262260437, | |
| "rewards/chosen": -0.7496582269668579, | |
| "rewards/margins": 1.0661474466323853, | |
| "rewards/rejected": -1.8159960508346558, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.060995664523845, | |
| "grad_norm": 102.5, | |
| "learning_rate": 1.736818785999114e-06, | |
| "logits/chosen": -1.2396435737609863, | |
| "logits/rejected": -0.9828730225563049, | |
| "logps/chosen": -332.7074890136719, | |
| "logps/rejected": -333.37249755859375, | |
| "loss": 0.4101, | |
| "rewards/accuracies": 0.8149999976158142, | |
| "rewards/chosen": -0.7449682354927063, | |
| "rewards/margins": 1.1290674209594727, | |
| "rewards/rejected": -1.8738598823547363, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 2.09089550007475, | |
| "grad_norm": 62.25, | |
| "learning_rate": 1.6814355338945504e-06, | |
| "logits/chosen": -1.2273823022842407, | |
| "logits/rejected": -0.88829505443573, | |
| "logps/chosen": -322.93499755859375, | |
| "logps/rejected": -300.385009765625, | |
| "loss": 0.4221, | |
| "rewards/accuracies": 0.8050000071525574, | |
| "rewards/chosen": -0.903369128704071, | |
| "rewards/margins": 1.0416357517242432, | |
| "rewards/rejected": -1.9447948932647705, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.120795335625654, | |
| "grad_norm": 86.5, | |
| "learning_rate": 1.6260522817899868e-06, | |
| "logits/chosen": -1.2524548768997192, | |
| "logits/rejected": -1.0671484470367432, | |
| "logps/chosen": -333.92999267578125, | |
| "logps/rejected": -318.6400146484375, | |
| "loss": 0.4119, | |
| "rewards/accuracies": 0.8149999976158142, | |
| "rewards/chosen": -0.7944982647895813, | |
| "rewards/margins": 1.1625818014144897, | |
| "rewards/rejected": -1.9566112756729126, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 2.1506951711765585, | |
| "grad_norm": 90.0, | |
| "learning_rate": 1.5706690296854231e-06, | |
| "logits/chosen": -1.2237915992736816, | |
| "logits/rejected": -0.956585705280304, | |
| "logps/chosen": -320.30999755859375, | |
| "logps/rejected": -302.2674865722656, | |
| "loss": 0.4528, | |
| "rewards/accuracies": 0.7674999833106995, | |
| "rewards/chosen": -0.9091894626617432, | |
| "rewards/margins": 1.0250316858291626, | |
| "rewards/rejected": -1.9344677925109863, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.1506951711765585, | |
| "eval_logits/chosen": -1.191327452659607, | |
| "eval_logits/rejected": -1.0433924198150635, | |
| "eval_logps/chosen": -327.741943359375, | |
| "eval_logps/rejected": -307.1559143066406, | |
| "eval_loss": 0.5188325047492981, | |
| "eval_rewards/accuracies": 0.6941244602203369, | |
| "eval_rewards/chosen": -0.8884723782539368, | |
| "eval_rewards/margins": 0.7567348480224609, | |
| "eval_rewards/rejected": -1.6454237699508667, | |
| "eval_runtime": 876.3236, | |
| "eval_samples_per_second": 1.697, | |
| "eval_steps_per_second": 0.212, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.180595006727463, | |
| "grad_norm": 74.5, | |
| "learning_rate": 1.5152857775808597e-06, | |
| "logits/chosen": -1.2849377393722534, | |
| "logits/rejected": -0.9589782953262329, | |
| "logps/chosen": -321.9987487792969, | |
| "logps/rejected": -307.2149963378906, | |
| "loss": 0.4031, | |
| "rewards/accuracies": 0.8349999785423279, | |
| "rewards/chosen": -0.7700170874595642, | |
| "rewards/margins": 1.1218103170394897, | |
| "rewards/rejected": -1.8917040824890137, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 2.2104948422783677, | |
| "grad_norm": 73.5, | |
| "learning_rate": 1.459902525476296e-06, | |
| "logits/chosen": -1.136842131614685, | |
| "logits/rejected": -0.9383144974708557, | |
| "logps/chosen": -319.8525085449219, | |
| "logps/rejected": -333.6600036621094, | |
| "loss": 0.424, | |
| "rewards/accuracies": 0.8075000047683716, | |
| "rewards/chosen": -0.8708154559135437, | |
| "rewards/margins": 1.0324267148971558, | |
| "rewards/rejected": -1.903378963470459, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.240394677829272, | |
| "grad_norm": 72.5, | |
| "learning_rate": 1.4045192733717325e-06, | |
| "logits/chosen": -1.1802786588668823, | |
| "logits/rejected": -0.9680548310279846, | |
| "logps/chosen": -317.48748779296875, | |
| "logps/rejected": -299.19000244140625, | |
| "loss": 0.4262, | |
| "rewards/accuracies": 0.8274999856948853, | |
| "rewards/chosen": -0.8513085842132568, | |
| "rewards/margins": 1.0704809427261353, | |
| "rewards/rejected": -1.9216357469558716, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 2.2702945133801764, | |
| "grad_norm": 84.0, | |
| "learning_rate": 1.3491360212671688e-06, | |
| "logits/chosen": -1.2559946775436401, | |
| "logits/rejected": -0.9639026522636414, | |
| "logps/chosen": -336.9750061035156, | |
| "logps/rejected": -323.49249267578125, | |
| "loss": 0.4294, | |
| "rewards/accuracies": 0.8025000095367432, | |
| "rewards/chosen": -0.8724609613418579, | |
| "rewards/margins": 1.0881787538528442, | |
| "rewards/rejected": -1.960756778717041, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.3001943489310808, | |
| "grad_norm": 71.0, | |
| "learning_rate": 1.2937527691626054e-06, | |
| "logits/chosen": -1.3266677856445312, | |
| "logits/rejected": -1.0626074075698853, | |
| "logps/chosen": -305.86749267578125, | |
| "logps/rejected": -291.93499755859375, | |
| "loss": 0.4471, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.9192346334457397, | |
| "rewards/margins": 1.0141992568969727, | |
| "rewards/rejected": -1.9337549209594727, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 2.330094184481985, | |
| "grad_norm": 109.5, | |
| "learning_rate": 1.2383695170580418e-06, | |
| "logits/chosen": -1.1726070642471313, | |
| "logits/rejected": -1.0060466527938843, | |
| "logps/chosen": -309.7799987792969, | |
| "logps/rejected": -311.13751220703125, | |
| "loss": 0.4333, | |
| "rewards/accuracies": 0.7724999785423279, | |
| "rewards/chosen": -0.8455395698547363, | |
| "rewards/margins": 1.0642285346984863, | |
| "rewards/rejected": -1.9100537300109863, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.35999402003289, | |
| "grad_norm": 43.0, | |
| "learning_rate": 1.1829862649534782e-06, | |
| "logits/chosen": -1.189868450164795, | |
| "logits/rejected": -1.0110809803009033, | |
| "logps/chosen": -343.5849914550781, | |
| "logps/rejected": -329.1675109863281, | |
| "loss": 0.4071, | |
| "rewards/accuracies": 0.8224999904632568, | |
| "rewards/chosen": -0.8902783393859863, | |
| "rewards/margins": 1.0464379787445068, | |
| "rewards/rejected": -1.9371508359909058, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 2.3898938555837943, | |
| "grad_norm": 86.5, | |
| "learning_rate": 1.1276030128489146e-06, | |
| "logits/chosen": -1.3213348388671875, | |
| "logits/rejected": -1.0948954820632935, | |
| "logps/chosen": -331.0174865722656, | |
| "logps/rejected": -307.2900085449219, | |
| "loss": 0.4075, | |
| "rewards/accuracies": 0.8349999785423279, | |
| "rewards/chosen": -0.8052575588226318, | |
| "rewards/margins": 1.1002050638198853, | |
| "rewards/rejected": -1.9058740139007568, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.3898938555837943, | |
| "eval_logits/chosen": -1.1904795169830322, | |
| "eval_logits/rejected": -1.042686104774475, | |
| "eval_logps/chosen": -327.67205810546875, | |
| "eval_logps/rejected": -307.0806579589844, | |
| "eval_loss": 0.5186262726783752, | |
| "eval_rewards/accuracies": 0.6967166662216187, | |
| "eval_rewards/chosen": -0.8813358545303345, | |
| "eval_rewards/margins": 0.7553303837776184, | |
| "eval_rewards/rejected": -1.6366767883300781, | |
| "eval_runtime": 876.3711, | |
| "eval_samples_per_second": 1.697, | |
| "eval_steps_per_second": 0.212, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.4197936911346987, | |
| "grad_norm": 67.0, | |
| "learning_rate": 1.072219760744351e-06, | |
| "logits/chosen": -1.2627320289611816, | |
| "logits/rejected": -1.0026310682296753, | |
| "logps/chosen": -335.5675048828125, | |
| "logps/rejected": -301.01251220703125, | |
| "loss": 0.4202, | |
| "rewards/accuracies": 0.7774999737739563, | |
| "rewards/chosen": -0.8969201445579529, | |
| "rewards/margins": 1.085205078125, | |
| "rewards/rejected": -1.9821679592132568, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 2.4496935266856035, | |
| "grad_norm": 86.0, | |
| "learning_rate": 1.0168365086397875e-06, | |
| "logits/chosen": -1.2463324069976807, | |
| "logits/rejected": -0.9855798482894897, | |
| "logps/chosen": -332.5849914550781, | |
| "logps/rejected": -324.9624938964844, | |
| "loss": 0.4193, | |
| "rewards/accuracies": 0.7925000190734863, | |
| "rewards/chosen": -0.8326050043106079, | |
| "rewards/margins": 1.0910131931304932, | |
| "rewards/rejected": -1.9229882955551147, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.479593362236508, | |
| "grad_norm": 53.75, | |
| "learning_rate": 9.61453256535224e-07, | |
| "logits/chosen": -1.2372454404830933, | |
| "logits/rejected": -0.9461462497711182, | |
| "logps/chosen": -328.4750061035156, | |
| "logps/rejected": -300.5224914550781, | |
| "loss": 0.4611, | |
| "rewards/accuracies": 0.7524999976158142, | |
| "rewards/chosen": -0.8591150045394897, | |
| "rewards/margins": 0.9913061261177063, | |
| "rewards/rejected": -1.8506054878234863, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 2.509493197787412, | |
| "grad_norm": 68.0, | |
| "learning_rate": 9.060700044306603e-07, | |
| "logits/chosen": -1.2847473621368408, | |
| "logits/rejected": -1.0720292329788208, | |
| "logps/chosen": -337.26251220703125, | |
| "logps/rejected": -307.17498779296875, | |
| "loss": 0.4101, | |
| "rewards/accuracies": 0.7799999713897705, | |
| "rewards/chosen": -0.8909338116645813, | |
| "rewards/margins": 1.1306884288787842, | |
| "rewards/rejected": -2.021728515625, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.5393930333383166, | |
| "grad_norm": 101.0, | |
| "learning_rate": 8.506867523260968e-07, | |
| "logits/chosen": -1.1994116306304932, | |
| "logits/rejected": -0.9730746746063232, | |
| "logps/chosen": -338.3999938964844, | |
| "logps/rejected": -304.99749755859375, | |
| "loss": 0.4387, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -0.7841222882270813, | |
| "rewards/margins": 1.0449267625808716, | |
| "rewards/rejected": -1.829746127128601, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 2.569292868889221, | |
| "grad_norm": 68.5, | |
| "learning_rate": 7.953035002215331e-07, | |
| "logits/chosen": -1.3298254013061523, | |
| "logits/rejected": -1.118627667427063, | |
| "logps/chosen": -309.739990234375, | |
| "logps/rejected": -308.24749755859375, | |
| "loss": 0.4449, | |
| "rewards/accuracies": 0.7774999737739563, | |
| "rewards/chosen": -0.8520336747169495, | |
| "rewards/margins": 0.9700658917427063, | |
| "rewards/rejected": -1.8218945264816284, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 2.5991927044401257, | |
| "grad_norm": 70.5, | |
| "learning_rate": 7.399202481169695e-07, | |
| "logits/chosen": -1.1831958293914795, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -327.49249267578125, | |
| "logps/rejected": -289.5924987792969, | |
| "loss": 0.4473, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.8408032059669495, | |
| "rewards/margins": 0.9420214891433716, | |
| "rewards/rejected": -1.7829101085662842, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 2.62909253999103, | |
| "grad_norm": 54.0, | |
| "learning_rate": 6.845369960124059e-07, | |
| "logits/chosen": -1.2656641006469727, | |
| "logits/rejected": -0.9782373309135437, | |
| "logps/chosen": -324.4200134277344, | |
| "logps/rejected": -290.0675048828125, | |
| "loss": 0.4419, | |
| "rewards/accuracies": 0.7825000286102295, | |
| "rewards/chosen": -0.9666149616241455, | |
| "rewards/margins": 1.0030114650726318, | |
| "rewards/rejected": -1.9694628715515137, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.62909253999103, | |
| "eval_logits/chosen": -1.1868830919265747, | |
| "eval_logits/rejected": -1.0399714708328247, | |
| "eval_logps/chosen": -327.6585998535156, | |
| "eval_logps/rejected": -306.9704284667969, | |
| "eval_loss": 0.5178263783454895, | |
| "eval_rewards/accuracies": 0.6993087530136108, | |
| "eval_rewards/chosen": -0.8778404593467712, | |
| "eval_rewards/margins": 0.7548588514328003, | |
| "eval_rewards/rejected": -1.6324502229690552, | |
| "eval_runtime": 876.3727, | |
| "eval_samples_per_second": 1.697, | |
| "eval_steps_per_second": 0.212, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.6589923755419345, | |
| "grad_norm": 67.5, | |
| "learning_rate": 6.291537439078423e-07, | |
| "logits/chosen": -1.2253618240356445, | |
| "logits/rejected": -1.0349105596542358, | |
| "logps/chosen": -336.12249755859375, | |
| "logps/rejected": -311.8275146484375, | |
| "loss": 0.4574, | |
| "rewards/accuracies": 0.7724999785423279, | |
| "rewards/chosen": -0.8752642869949341, | |
| "rewards/margins": 0.9961340427398682, | |
| "rewards/rejected": -1.8713818788528442, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 2.688892211092839, | |
| "grad_norm": 100.0, | |
| "learning_rate": 5.737704918032787e-07, | |
| "logits/chosen": -1.2597771883010864, | |
| "logits/rejected": -0.9909564256668091, | |
| "logps/chosen": -326.6600036621094, | |
| "logps/rejected": -316.19000244140625, | |
| "loss": 0.4751, | |
| "rewards/accuracies": 0.7674999833106995, | |
| "rewards/chosen": -0.9248193502426147, | |
| "rewards/margins": 0.9592040777206421, | |
| "rewards/rejected": -1.8837096691131592, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.7187920466437436, | |
| "grad_norm": 76.0, | |
| "learning_rate": 5.183872396987152e-07, | |
| "logits/chosen": -1.2072705030441284, | |
| "logits/rejected": -0.9592925906181335, | |
| "logps/chosen": -322.36248779296875, | |
| "logps/rejected": -315.8374938964844, | |
| "loss": 0.391, | |
| "rewards/accuracies": 0.8274999856948853, | |
| "rewards/chosen": -0.7576141357421875, | |
| "rewards/margins": 1.160730004310608, | |
| "rewards/rejected": -1.9182031154632568, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 2.748691882194648, | |
| "grad_norm": 53.0, | |
| "learning_rate": 4.630039875941516e-07, | |
| "logits/chosen": -1.287199854850769, | |
| "logits/rejected": -0.9606054425239563, | |
| "logps/chosen": -344.7650146484375, | |
| "logps/rejected": -331.24749755859375, | |
| "loss": 0.4177, | |
| "rewards/accuracies": 0.8149999976158142, | |
| "rewards/chosen": -0.7748047113418579, | |
| "rewards/margins": 1.1645703315734863, | |
| "rewards/rejected": -1.9394140243530273, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.7785917177455524, | |
| "grad_norm": 87.0, | |
| "learning_rate": 4.07620735489588e-07, | |
| "logits/chosen": -1.2260528802871704, | |
| "logits/rejected": -1.0005972385406494, | |
| "logps/chosen": -312.9624938964844, | |
| "logps/rejected": -323.0400085449219, | |
| "loss": 0.3917, | |
| "rewards/accuracies": 0.8349999785423279, | |
| "rewards/chosen": -0.7925238013267517, | |
| "rewards/margins": 1.185449242591858, | |
| "rewards/rejected": -1.9780443906784058, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 2.8084915532964567, | |
| "grad_norm": 56.5, | |
| "learning_rate": 3.5223748338502434e-07, | |
| "logits/chosen": -1.2027392387390137, | |
| "logits/rejected": -0.989107608795166, | |
| "logps/chosen": -321.3762512207031, | |
| "logps/rejected": -318.11749267578125, | |
| "loss": 0.4052, | |
| "rewards/accuracies": 0.8174999952316284, | |
| "rewards/chosen": -0.8751891851425171, | |
| "rewards/margins": 1.1021533012390137, | |
| "rewards/rejected": -1.976718783378601, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.838391388847361, | |
| "grad_norm": 54.5, | |
| "learning_rate": 2.968542312804608e-07, | |
| "logits/chosen": -1.2425882816314697, | |
| "logits/rejected": -0.9340093731880188, | |
| "logps/chosen": -335.12249755859375, | |
| "logps/rejected": -320.2049865722656, | |
| "loss": 0.4115, | |
| "rewards/accuracies": 0.8224999904632568, | |
| "rewards/chosen": -0.8292675614356995, | |
| "rewards/margins": 1.1182934045791626, | |
| "rewards/rejected": -1.9483104944229126, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 2.868291224398266, | |
| "grad_norm": 87.0, | |
| "learning_rate": 2.4147097917589725e-07, | |
| "logits/chosen": -1.3012477159500122, | |
| "logits/rejected": -1.0664279460906982, | |
| "logps/chosen": -293.489990234375, | |
| "logps/rejected": -285.197509765625, | |
| "loss": 0.4277, | |
| "rewards/accuracies": 0.8025000095367432, | |
| "rewards/chosen": -0.8684576153755188, | |
| "rewards/margins": 1.069272518157959, | |
| "rewards/rejected": -1.9371191263198853, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.868291224398266, | |
| "eval_logits/chosen": -1.1853525638580322, | |
| "eval_logits/rejected": -1.0373817682266235, | |
| "eval_logps/chosen": -327.3817138671875, | |
| "eval_logps/rejected": -306.81451416015625, | |
| "eval_loss": 0.5165102481842041, | |
| "eval_rewards/accuracies": 0.7006528377532959, | |
| "eval_rewards/chosen": -0.8549529314041138, | |
| "eval_rewards/margins": 0.7583125829696655, | |
| "eval_rewards/rejected": -1.6133127212524414, | |
| "eval_runtime": 876.3322, | |
| "eval_samples_per_second": 1.697, | |
| "eval_steps_per_second": 0.212, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.8981910599491703, | |
| "grad_norm": 46.0, | |
| "learning_rate": 1.8608772707133363e-07, | |
| "logits/chosen": -1.356745958328247, | |
| "logits/rejected": -1.0496530532836914, | |
| "logps/chosen": -319.9649963378906, | |
| "logps/rejected": -309.7025146484375, | |
| "loss": 0.4037, | |
| "rewards/accuracies": 0.8025000095367432, | |
| "rewards/chosen": -0.8254479765892029, | |
| "rewards/margins": 1.1192578077316284, | |
| "rewards/rejected": -1.9445117712020874, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 2.9280908955000746, | |
| "grad_norm": 70.5, | |
| "learning_rate": 1.3070447496677006e-07, | |
| "logits/chosen": -1.2751880884170532, | |
| "logits/rejected": -1.0796799659729004, | |
| "logps/chosen": -316.9425048828125, | |
| "logps/rejected": -325.7550048828125, | |
| "loss": 0.4306, | |
| "rewards/accuracies": 0.7724999785423279, | |
| "rewards/chosen": -0.8079773187637329, | |
| "rewards/margins": 1.000207543373108, | |
| "rewards/rejected": -1.8083984851837158, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.9579907310509794, | |
| "grad_norm": 74.0, | |
| "learning_rate": 7.532122286220647e-08, | |
| "logits/chosen": -1.2595221996307373, | |
| "logits/rejected": -1.0140166282653809, | |
| "logps/chosen": -320.6000061035156, | |
| "logps/rejected": -318.6600036621094, | |
| "loss": 0.4808, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -1.000390648841858, | |
| "rewards/margins": 0.931530773639679, | |
| "rewards/rejected": -1.9319677352905273, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 2.987890566601884, | |
| "grad_norm": 78.5, | |
| "learning_rate": 1.993797075764289e-08, | |
| "logits/chosen": -1.2403491735458374, | |
| "logits/rejected": -0.9544309973716736, | |
| "logps/chosen": -343.76251220703125, | |
| "logps/rejected": -336.38250732421875, | |
| "loss": 0.4225, | |
| "rewards/accuracies": 0.8149999976158142, | |
| "rewards/chosen": -0.7856341600418091, | |
| "rewards/margins": 1.0573632717132568, | |
| "rewards/rejected": -1.8428466320037842, | |
| "step": 2500 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 2508, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |