| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 1000, | |
| "global_step": 1250, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4e-09, | |
| "logits/chosen": -0.47322866320610046, | |
| "logits/rejected": -0.3592185378074646, | |
| "logps/chosen": -237.07044982910156, | |
| "logps/rejected": -252.6734161376953, | |
| "loss": 0.691, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.00030384131241589785, | |
| "rewards/margins": 0.011311720125377178, | |
| "rewards/mix_margin": 0.011311912909150124, | |
| "rewards/rejected": -0.011615562252700329, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4e-08, | |
| "logits/chosen": -0.5430262088775635, | |
| "logits/rejected": -0.529547393321991, | |
| "logps/chosen": -268.2706604003906, | |
| "logps/rejected": -278.84808349609375, | |
| "loss": 0.6969, | |
| "rewards/accuracies": 0.3888888955116272, | |
| "rewards/chosen": -0.004503441508859396, | |
| "rewards/confidence": -0.12590448558330536, | |
| "rewards/confidence_mean_diff": 0.12590448558330536, | |
| "rewards/confidence_moving_diff": 0.003324742428958416, | |
| "rewards/margins": -0.01026806328445673, | |
| "rewards/mix_margin": -0.01026806328445673, | |
| "rewards/real_percentage": 13.161290168762207, | |
| "rewards/rejected": 0.005764622241258621, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 8e-08, | |
| "logits/chosen": -0.5056057572364807, | |
| "logits/rejected": -0.4968181252479553, | |
| "logps/chosen": -255.50906372070312, | |
| "logps/rejected": -285.2353820800781, | |
| "loss": 0.6916, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.004500885494053364, | |
| "rewards/confidence": -0.10827472060918808, | |
| "rewards/confidence_mean_diff": 0.10827472060918808, | |
| "rewards/confidence_moving_diff": 9.412765211891383e-05, | |
| "rewards/margins": 0.004025735892355442, | |
| "rewards/mix_margin": 0.0040255356580019, | |
| "rewards/real_percentage": 12.074999809265137, | |
| "rewards/rejected": -0.008526620455086231, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.2e-07, | |
| "logits/chosen": -0.47794079780578613, | |
| "logits/rejected": -0.46892619132995605, | |
| "logps/chosen": -255.41366577148438, | |
| "logps/rejected": -270.9418640136719, | |
| "loss": 0.6884, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.027412116527557373, | |
| "rewards/confidence": -0.08519235253334045, | |
| "rewards/confidence_mean_diff": 0.08519235253334045, | |
| "rewards/confidence_moving_diff": -0.00016973493620753288, | |
| "rewards/margins": 0.02382436767220497, | |
| "rewards/mix_margin": 0.023824330419301987, | |
| "rewards/real_percentage": 11.899999618530273, | |
| "rewards/rejected": -0.05123649165034294, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.6e-07, | |
| "logits/chosen": -0.6560722589492798, | |
| "logits/rejected": -0.6274336576461792, | |
| "logps/chosen": -254.38381958007812, | |
| "logps/rejected": -273.35894775390625, | |
| "loss": 0.6725, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.07497148215770721, | |
| "rewards/confidence": -0.1010628491640091, | |
| "rewards/confidence_mean_diff": 0.1010628491640091, | |
| "rewards/confidence_moving_diff": -2.9067974537611008e-05, | |
| "rewards/margins": 0.06277047842741013, | |
| "rewards/mix_margin": 0.06277050077915192, | |
| "rewards/real_percentage": 12.0, | |
| "rewards/rejected": -0.13774196803569794, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2e-07, | |
| "logits/chosen": -0.5598307847976685, | |
| "logits/rejected": -0.543168306350708, | |
| "logps/chosen": -255.5590362548828, | |
| "logps/rejected": -283.821533203125, | |
| "loss": 0.6211, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.18795588612556458, | |
| "rewards/confidence": -0.14614970982074738, | |
| "rewards/confidence_mean_diff": 0.14614970982074738, | |
| "rewards/confidence_moving_diff": 0.0007611083565279841, | |
| "rewards/margins": 0.1645088642835617, | |
| "rewards/mix_margin": 0.16450893878936768, | |
| "rewards/real_percentage": 12.050000190734863, | |
| "rewards/rejected": -0.3524647653102875, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.4e-07, | |
| "logits/chosen": -0.5678125619888306, | |
| "logits/rejected": -0.5200175642967224, | |
| "logps/chosen": -267.771484375, | |
| "logps/rejected": -292.8956604003906, | |
| "loss": 0.5978, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.44606050848960876, | |
| "rewards/confidence": -0.23100724816322327, | |
| "rewards/confidence_mean_diff": 0.23100724816322327, | |
| "rewards/confidence_moving_diff": 0.0013331411173567176, | |
| "rewards/margins": 0.3294847905635834, | |
| "rewards/mix_margin": 0.32948458194732666, | |
| "rewards/real_percentage": 12.149999618530273, | |
| "rewards/rejected": -0.7755452990531921, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 2.8e-07, | |
| "logits/chosen": -0.5431959629058838, | |
| "logits/rejected": -0.5101473331451416, | |
| "logps/chosen": -261.19873046875, | |
| "logps/rejected": -290.13836669921875, | |
| "loss": 0.599, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.6963601112365723, | |
| "rewards/confidence": -0.28327494859695435, | |
| "rewards/confidence_mean_diff": 0.28327494859695435, | |
| "rewards/confidence_moving_diff": 0.0003418736159801483, | |
| "rewards/margins": 0.44483843445777893, | |
| "rewards/mix_margin": 0.44483891129493713, | |
| "rewards/real_percentage": 12.024999618530273, | |
| "rewards/rejected": -1.1411985158920288, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 3.2e-07, | |
| "logits/chosen": -0.4392545223236084, | |
| "logits/rejected": -0.40478867292404175, | |
| "logps/chosen": -262.34808349609375, | |
| "logps/rejected": -277.52520751953125, | |
| "loss": 0.5747, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.6036010384559631, | |
| "rewards/confidence": -0.3173912465572357, | |
| "rewards/confidence_mean_diff": 0.3173912465572357, | |
| "rewards/confidence_moving_diff": 0.001234588329680264, | |
| "rewards/margins": 0.3477805256843567, | |
| "rewards/mix_margin": 0.34778040647506714, | |
| "rewards/real_percentage": 12.0, | |
| "rewards/rejected": -0.951381504535675, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 3.6e-07, | |
| "logits/chosen": -0.42480334639549255, | |
| "logits/rejected": -0.38244059681892395, | |
| "logps/chosen": -267.52569580078125, | |
| "logps/rejected": -289.1376953125, | |
| "loss": 0.5059, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.38540148735046387, | |
| "rewards/confidence": -0.3338642716407776, | |
| "rewards/confidence_mean_diff": 0.3338642716407776, | |
| "rewards/confidence_moving_diff": -0.00127419491764158, | |
| "rewards/margins": 0.6211220026016235, | |
| "rewards/mix_margin": 0.621121883392334, | |
| "rewards/real_percentage": 11.975000381469727, | |
| "rewards/rejected": -1.0065234899520874, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 4e-07, | |
| "logits/chosen": -0.6353573203086853, | |
| "logits/rejected": -0.5929123163223267, | |
| "logps/chosen": -275.74505615234375, | |
| "logps/rejected": -290.1647033691406, | |
| "loss": 0.5212, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.9032983779907227, | |
| "rewards/confidence": -0.2447614222764969, | |
| "rewards/confidence_mean_diff": 0.2447614222764969, | |
| "rewards/confidence_moving_diff": 0.0010750587098300457, | |
| "rewards/margins": 0.7160919904708862, | |
| "rewards/mix_margin": 0.7160922288894653, | |
| "rewards/real_percentage": 12.0, | |
| "rewards/rejected": -1.6193904876708984, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.3999999999999997e-07, | |
| "logits/chosen": -0.46968236565589905, | |
| "logits/rejected": -0.4189048707485199, | |
| "logps/chosen": -267.0975341796875, | |
| "logps/rejected": -290.6430358886719, | |
| "loss": 0.5351, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.8663312792778015, | |
| "rewards/confidence": -0.5578163266181946, | |
| "rewards/confidence_mean_diff": 0.5578163266181946, | |
| "rewards/confidence_moving_diff": -0.0013048466062173247, | |
| "rewards/margins": 0.8534231185913086, | |
| "rewards/mix_margin": 0.8534228205680847, | |
| "rewards/real_percentage": 11.925000190734863, | |
| "rewards/rejected": -1.7197544574737549, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.8e-07, | |
| "logits/chosen": -0.48265400528907776, | |
| "logits/rejected": -0.42460212111473083, | |
| "logps/chosen": -266.2710876464844, | |
| "logps/rejected": -297.87896728515625, | |
| "loss": 0.4619, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -1.0156608819961548, | |
| "rewards/confidence": -0.13284577429294586, | |
| "rewards/confidence_mean_diff": 0.13284577429294586, | |
| "rewards/confidence_moving_diff": -0.003854532493278384, | |
| "rewards/margins": 1.2845903635025024, | |
| "rewards/mix_margin": 1.2845900058746338, | |
| "rewards/real_percentage": 11.824999809265137, | |
| "rewards/rejected": -2.3002512454986572, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.99975631002326e-07, | |
| "logits/chosen": -0.47339755296707153, | |
| "logits/rejected": -0.41515836119651794, | |
| "logps/chosen": -266.7010803222656, | |
| "logps/rejected": -290.9997253417969, | |
| "loss": 0.5086, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.4632090330123901, | |
| "rewards/confidence": -0.21441641449928284, | |
| "rewards/confidence_mean_diff": 0.21441641449928284, | |
| "rewards/confidence_moving_diff": 0.005650043487548828, | |
| "rewards/margins": 1.0238463878631592, | |
| "rewards/mix_margin": 1.0238463878631592, | |
| "rewards/real_percentage": 12.274999618530273, | |
| "rewards/rejected": -2.487055540084839, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.997807075247145e-07, | |
| "logits/chosen": -0.5116972923278809, | |
| "logits/rejected": -0.4944287836551666, | |
| "logps/chosen": -268.1426696777344, | |
| "logps/rejected": -309.47283935546875, | |
| "loss": 0.5297, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.6944953203201294, | |
| "rewards/confidence": -0.4855690598487854, | |
| "rewards/confidence_mean_diff": 0.4855690598487854, | |
| "rewards/confidence_moving_diff": 0.005400919821113348, | |
| "rewards/margins": 0.97160804271698, | |
| "rewards/mix_margin": 0.9716082811355591, | |
| "rewards/real_percentage": 12.149999618530273, | |
| "rewards/rejected": -1.6661033630371094, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.99391012564956e-07, | |
| "logits/chosen": -0.4254834055900574, | |
| "logits/rejected": -0.3741452991962433, | |
| "logps/chosen": -263.1833801269531, | |
| "logps/rejected": -287.128662109375, | |
| "loss": 0.4845, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.4041715860366821, | |
| "rewards/confidence": -0.7750243544578552, | |
| "rewards/confidence_mean_diff": 0.7750243544578552, | |
| "rewards/confidence_moving_diff": -0.007257706020027399, | |
| "rewards/margins": 1.168788194656372, | |
| "rewards/mix_margin": 1.168788194656372, | |
| "rewards/real_percentage": 11.875, | |
| "rewards/rejected": -2.5729594230651855, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.988068499954577e-07, | |
| "logits/chosen": -0.5084344148635864, | |
| "logits/rejected": -0.46291494369506836, | |
| "logps/chosen": -280.2730712890625, | |
| "logps/rejected": -308.90557861328125, | |
| "loss": 0.4799, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.027472734451294, | |
| "rewards/confidence": -0.22910213470458984, | |
| "rewards/confidence_mean_diff": 0.22910213470458984, | |
| "rewards/confidence_moving_diff": 0.0028620243538171053, | |
| "rewards/margins": 1.4721789360046387, | |
| "rewards/mix_margin": 1.4721790552139282, | |
| "rewards/real_percentage": 12.024999618530273, | |
| "rewards/rejected": -3.4996516704559326, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.980286753286194e-07, | |
| "logits/chosen": -0.40486305952072144, | |
| "logits/rejected": -0.37784627079963684, | |
| "logps/chosen": -260.0851135253906, | |
| "logps/rejected": -290.01385498046875, | |
| "loss": 0.4526, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -1.28327476978302, | |
| "rewards/confidence": -0.6308544278144836, | |
| "rewards/confidence_mean_diff": 0.6308544278144836, | |
| "rewards/confidence_moving_diff": 0.0030842300038784742, | |
| "rewards/margins": 1.3978360891342163, | |
| "rewards/mix_margin": 1.3978359699249268, | |
| "rewards/real_percentage": 11.925000190734863, | |
| "rewards/rejected": -2.6811108589172363, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.970570953616382e-07, | |
| "logits/chosen": -0.5191699266433716, | |
| "logits/rejected": -0.4658164381980896, | |
| "logps/chosen": -288.48779296875, | |
| "logps/rejected": -317.05633544921875, | |
| "loss": 0.4383, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -2.648993730545044, | |
| "rewards/confidence": -0.47053903341293335, | |
| "rewards/confidence_mean_diff": 0.47053903341293335, | |
| "rewards/confidence_moving_diff": -0.004885196685791016, | |
| "rewards/margins": 1.3545629978179932, | |
| "rewards/mix_margin": 1.3545629978179932, | |
| "rewards/real_percentage": 11.949999809265137, | |
| "rewards/rejected": -4.003556728363037, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.958928677033465e-07, | |
| "logits/chosen": -0.5680745840072632, | |
| "logits/rejected": -0.5394167304039001, | |
| "logps/chosen": -301.71881103515625, | |
| "logps/rejected": -335.8786315917969, | |
| "loss": 0.4476, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -3.0096700191497803, | |
| "rewards/confidence": -0.4321349561214447, | |
| "rewards/confidence_mean_diff": 0.4321349561214447, | |
| "rewards/confidence_moving_diff": 0.001477098441682756, | |
| "rewards/margins": 1.8966251611709595, | |
| "rewards/mix_margin": 1.8966249227523804, | |
| "rewards/real_percentage": 12.074999809265137, | |
| "rewards/rejected": -4.9062957763671875, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.945369001834514e-07, | |
| "logits/chosen": -0.5155278444290161, | |
| "logits/rejected": -0.4719982147216797, | |
| "logps/chosen": -261.91156005859375, | |
| "logps/rejected": -292.0012512207031, | |
| "loss": 0.5461, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.8270504474639893, | |
| "rewards/confidence": -0.6011069416999817, | |
| "rewards/confidence_mean_diff": 0.6011069416999817, | |
| "rewards/confidence_moving_diff": 0.004404354374855757, | |
| "rewards/margins": 1.4701335430145264, | |
| "rewards/mix_margin": 1.4701337814331055, | |
| "rewards/real_percentage": 12.050000190734863, | |
| "rewards/rejected": -3.2971839904785156, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.929902501446366e-07, | |
| "logits/chosen": -0.4085623323917389, | |
| "logits/rejected": -0.3550710082054138, | |
| "logps/chosen": -292.015380859375, | |
| "logps/rejected": -317.65093994140625, | |
| "loss": 0.4305, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.833467483520508, | |
| "rewards/confidence": -0.7786771059036255, | |
| "rewards/confidence_mean_diff": 0.7786771059036255, | |
| "rewards/confidence_moving_diff": -0.0016236401861533523, | |
| "rewards/margins": 1.6545846462249756, | |
| "rewards/mix_margin": 1.6545846462249756, | |
| "rewards/real_percentage": 12.0, | |
| "rewards/rejected": -4.4880523681640625, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.912541236180778e-07, | |
| "logits/chosen": -0.47047504782676697, | |
| "logits/rejected": -0.4149271547794342, | |
| "logps/chosen": -275.33258056640625, | |
| "logps/rejected": -316.54473876953125, | |
| "loss": 0.4504, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -1.9749810695648193, | |
| "rewards/confidence": -0.41206398606300354, | |
| "rewards/confidence_mean_diff": 0.41206398606300354, | |
| "rewards/confidence_moving_diff": 0.00011900011304533109, | |
| "rewards/margins": 1.932944655418396, | |
| "rewards/mix_margin": 1.932944893836975, | |
| "rewards/real_percentage": 11.949999809265137, | |
| "rewards/rejected": -3.907925844192505, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.893298743830167e-07, | |
| "logits/chosen": -0.37697917222976685, | |
| "logits/rejected": -0.36001843214035034, | |
| "logps/chosen": -276.3026428222656, | |
| "logps/rejected": -314.4491882324219, | |
| "loss": 0.478, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -2.710088014602661, | |
| "rewards/confidence": -0.4186745285987854, | |
| "rewards/confidence_mean_diff": 0.4186745285987854, | |
| "rewards/confidence_moving_diff": -0.008362541906535625, | |
| "rewards/margins": 1.7036726474761963, | |
| "rewards/mix_margin": 1.7036726474761963, | |
| "rewards/real_percentage": 11.925000190734863, | |
| "rewards/rejected": -4.413760662078857, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.872190029111241e-07, | |
| "logits/chosen": -0.45598506927490234, | |
| "logits/rejected": -0.4249623417854309, | |
| "logps/chosen": -284.714599609375, | |
| "logps/rejected": -305.51348876953125, | |
| "loss": 0.525, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -2.376721143722534, | |
| "rewards/confidence": -0.39786866307258606, | |
| "rewards/confidence_mean_diff": 0.39786866307258606, | |
| "rewards/confidence_moving_diff": 0.0037966251838952303, | |
| "rewards/margins": 1.580210566520691, | |
| "rewards/mix_margin": 1.5802103281021118, | |
| "rewards/real_percentage": 12.0, | |
| "rewards/rejected": -3.9569315910339355, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.849231551964771e-07, | |
| "logits/chosen": -0.3795531094074249, | |
| "logits/rejected": -0.3494294285774231, | |
| "logps/chosen": -282.3592529296875, | |
| "logps/rejected": -326.60516357421875, | |
| "loss": 0.4548, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -2.384575366973877, | |
| "rewards/confidence": -0.36359551548957825, | |
| "rewards/confidence_mean_diff": 0.36359551548957825, | |
| "rewards/confidence_moving_diff": -0.002334222663193941, | |
| "rewards/margins": 1.9443126916885376, | |
| "rewards/mix_margin": 1.9443128108978271, | |
| "rewards/real_percentage": 12.0, | |
| "rewards/rejected": -4.328887939453125, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.824441214720628e-07, | |
| "logits/chosen": -0.3886776864528656, | |
| "logits/rejected": -0.3209950029850006, | |
| "logps/chosen": -294.50701904296875, | |
| "logps/rejected": -323.2568054199219, | |
| "loss": 0.4675, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": -2.5119338035583496, | |
| "rewards/confidence": -0.21167421340942383, | |
| "rewards/confidence_mean_diff": 0.21167421340942383, | |
| "rewards/confidence_moving_diff": 0.0023379321210086346, | |
| "rewards/margins": 2.0011048316955566, | |
| "rewards/mix_margin": 2.0011048316955566, | |
| "rewards/real_percentage": 12.050000190734863, | |
| "rewards/rejected": -4.51303768157959, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.797838348138086e-07, | |
| "logits/chosen": -0.4468967914581299, | |
| "logits/rejected": -0.37465429306030273, | |
| "logps/chosen": -292.5479431152344, | |
| "logps/rejected": -336.9075927734375, | |
| "loss": 0.4486, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -3.818824052810669, | |
| "rewards/confidence": -0.45908528566360474, | |
| "rewards/confidence_mean_diff": 0.45908528566360474, | |
| "rewards/confidence_moving_diff": 0.0033530525397509336, | |
| "rewards/margins": 1.8966922760009766, | |
| "rewards/mix_margin": 1.8966926336288452, | |
| "rewards/real_percentage": 12.149999618530273, | |
| "rewards/rejected": -5.715516090393066, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.769443696332272e-07, | |
| "logits/chosen": -0.4200224280357361, | |
| "logits/rejected": -0.3721178472042084, | |
| "logps/chosen": -294.15472412109375, | |
| "logps/rejected": -325.35491943359375, | |
| "loss": 0.4818, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.8975882530212402, | |
| "rewards/confidence": -1.0875123739242554, | |
| "rewards/confidence_mean_diff": 1.0875123739242554, | |
| "rewards/confidence_moving_diff": 0.007382317446172237, | |
| "rewards/margins": 1.5224123001098633, | |
| "rewards/mix_margin": 1.5224121809005737, | |
| "rewards/real_percentage": 12.024999618530273, | |
| "rewards/rejected": -4.4200005531311035, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 4.739279400598532e-07, | |
| "logits/chosen": -0.4379913806915283, | |
| "logits/rejected": -0.40653958916664124, | |
| "logps/chosen": -265.93365478515625, | |
| "logps/rejected": -302.0426025390625, | |
| "loss": 0.4412, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.067883014678955, | |
| "rewards/confidence": -0.7120405435562134, | |
| "rewards/confidence_mean_diff": 0.7120405435562134, | |
| "rewards/confidence_moving_diff": -0.008706340566277504, | |
| "rewards/margins": 1.782560110092163, | |
| "rewards/mix_margin": 1.7825605869293213, | |
| "rewards/real_percentage": 12.0, | |
| "rewards/rejected": -3.8504433631896973, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.707368982147317e-07, | |
| "logits/chosen": -0.4465025067329407, | |
| "logits/rejected": -0.3766050338745117, | |
| "logps/chosen": -290.00079345703125, | |
| "logps/rejected": -336.4072265625, | |
| "loss": 0.4531, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -4.083464622497559, | |
| "rewards/confidence": -0.584677517414093, | |
| "rewards/confidence_mean_diff": 0.584677517414093, | |
| "rewards/confidence_moving_diff": 0.0012303728144615889, | |
| "rewards/margins": 1.8622348308563232, | |
| "rewards/mix_margin": 1.862234354019165, | |
| "rewards/real_percentage": 12.125, | |
| "rewards/rejected": -5.945699691772461, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.6737373237630473e-07, | |
| "logits/chosen": -0.46789780259132385, | |
| "logits/rejected": -0.42397865653038025, | |
| "logps/chosen": -298.47894287109375, | |
| "logps/rejected": -318.4173583984375, | |
| "loss": 0.4629, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -3.485027313232422, | |
| "rewards/confidence": -0.6979893445968628, | |
| "rewards/confidence_mean_diff": 0.6979893445968628, | |
| "rewards/confidence_moving_diff": -0.0007962509989738464, | |
| "rewards/margins": 1.487161636352539, | |
| "rewards/mix_margin": 1.4871612787246704, | |
| "rewards/real_percentage": 12.0, | |
| "rewards/rejected": -4.972188472747803, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.6384106504012665e-07, | |
| "logits/chosen": -0.342430055141449, | |
| "logits/rejected": -0.3011329770088196, | |
| "logps/chosen": -319.3477783203125, | |
| "logps/rejected": -361.84100341796875, | |
| "loss": 0.4204, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -4.075822830200195, | |
| "rewards/confidence": -0.4167357087135315, | |
| "rewards/confidence_mean_diff": 0.4167357087135315, | |
| "rewards/confidence_moving_diff": 0.004710749723017216, | |
| "rewards/margins": 1.914832353591919, | |
| "rewards/mix_margin": 1.9148324728012085, | |
| "rewards/real_percentage": 12.100000381469727, | |
| "rewards/rejected": -5.990654945373535, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.6014165087392105e-07, | |
| "logits/chosen": -0.31385287642478943, | |
| "logits/rejected": -0.2901327908039093, | |
| "logps/chosen": -314.6577453613281, | |
| "logps/rejected": -365.28314208984375, | |
| "loss": 0.468, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": -4.32960319519043, | |
| "rewards/confidence": -0.7949329614639282, | |
| "rewards/confidence_mean_diff": 0.7949329614639282, | |
| "rewards/confidence_moving_diff": -0.002688751323148608, | |
| "rewards/margins": 2.037389039993286, | |
| "rewards/mix_margin": 2.0373892784118652, | |
| "rewards/real_percentage": 12.0, | |
| "rewards/rejected": -6.366991996765137, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.5627837456957374e-07, | |
| "logits/chosen": -0.521568238735199, | |
| "logits/rejected": -0.5004194378852844, | |
| "logps/chosen": -288.70245361328125, | |
| "logps/rejected": -321.8418273925781, | |
| "loss": 0.3893, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -3.327012300491333, | |
| "rewards/confidence": -0.5302469730377197, | |
| "rewards/confidence_mean_diff": 0.5302469730377197, | |
| "rewards/confidence_moving_diff": -0.001034903572872281, | |
| "rewards/margins": 1.743878722190857, | |
| "rewards/mix_margin": 1.743878960609436, | |
| "rewards/real_percentage": 12.074999809265137, | |
| "rewards/rejected": -5.070891380310059, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 4.5225424859373684e-07, | |
| "logits/chosen": -0.5100721716880798, | |
| "logits/rejected": -0.4984208047389984, | |
| "logps/chosen": -283.9259338378906, | |
| "logps/rejected": -315.3064270019531, | |
| "loss": 0.4078, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -3.368438243865967, | |
| "rewards/confidence": -0.48844680190086365, | |
| "rewards/confidence_mean_diff": 0.48844680190086365, | |
| "rewards/confidence_moving_diff": -0.001804995583370328, | |
| "rewards/margins": 1.6539949178695679, | |
| "rewards/mix_margin": 1.653995156288147, | |
| "rewards/real_percentage": 11.975000381469727, | |
| "rewards/rejected": -5.022432804107666, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.4807241083879764e-07, | |
| "logits/chosen": -0.5656698942184448, | |
| "logits/rejected": -0.5055958032608032, | |
| "logps/chosen": -309.11492919921875, | |
| "logps/rejected": -334.089599609375, | |
| "loss": 0.4642, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -3.7679576873779297, | |
| "rewards/confidence": -0.7686803936958313, | |
| "rewards/confidence_mean_diff": 0.7686803936958313, | |
| "rewards/confidence_moving_diff": 0.004600618965923786, | |
| "rewards/margins": 1.6603130102157593, | |
| "rewards/mix_margin": 1.6603130102157593, | |
| "rewards/real_percentage": 12.100000381469727, | |
| "rewards/rejected": -5.4282708168029785, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.437361221760449e-07, | |
| "logits/chosen": -0.5334455966949463, | |
| "logits/rejected": -0.4963143467903137, | |
| "logps/chosen": -320.7303161621094, | |
| "logps/rejected": -356.6766052246094, | |
| "loss": 0.3716, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -3.9415154457092285, | |
| "rewards/confidence": -0.6460387110710144, | |
| "rewards/confidence_mean_diff": 0.6460387110710144, | |
| "rewards/confidence_moving_diff": -0.003295689821243286, | |
| "rewards/margins": 1.6374887228012085, | |
| "rewards/mix_margin": 1.6374889612197876, | |
| "rewards/real_percentage": 11.975000381469727, | |
| "rewards/rejected": -5.579003810882568, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.392487639129391e-07, | |
| "logits/chosen": -0.45130062103271484, | |
| "logits/rejected": -0.41561928391456604, | |
| "logps/chosen": -292.44219970703125, | |
| "logps/rejected": -326.08740234375, | |
| "loss": 0.4046, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -3.6586639881134033, | |
| "rewards/confidence": -0.7409017086029053, | |
| "rewards/confidence_mean_diff": 0.7409017086029053, | |
| "rewards/confidence_moving_diff": -0.004873313941061497, | |
| "rewards/margins": 2.0624423027038574, | |
| "rewards/mix_margin": 2.0624423027038574, | |
| "rewards/real_percentage": 11.949999809265137, | |
| "rewards/rejected": -5.72110652923584, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 4.34613835156471e-07, | |
| "logits/chosen": -0.4289465844631195, | |
| "logits/rejected": -0.370225191116333, | |
| "logps/chosen": -301.20916748046875, | |
| "logps/rejected": -353.47808837890625, | |
| "loss": 0.4369, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -3.5139365196228027, | |
| "rewards/confidence": 0.1294311136007309, | |
| "rewards/confidence_mean_diff": -0.1294311136007309, | |
| "rewards/confidence_moving_diff": 0.0018305673729628325, | |
| "rewards/margins": 2.4083054065704346, | |
| "rewards/mix_margin": 2.4083054065704346, | |
| "rewards/real_percentage": 12.074999809265137, | |
| "rewards/rejected": -5.922241687774658, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.2983495008466273e-07, | |
| "logits/chosen": -0.5299749970436096, | |
| "logits/rejected": -0.4628564715385437, | |
| "logps/chosen": -304.3265380859375, | |
| "logps/rejected": -341.41021728515625, | |
| "loss": 0.4896, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -3.638075590133667, | |
| "rewards/confidence": -0.6767855882644653, | |
| "rewards/confidence_mean_diff": 0.6767855882644653, | |
| "rewards/confidence_moving_diff": 0.00973493605852127, | |
| "rewards/margins": 2.2702558040618896, | |
| "rewards/mix_margin": 2.2702555656433105, | |
| "rewards/real_percentage": 12.100000381469727, | |
| "rewards/rejected": -5.908331871032715, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 4.249158351283413e-07, | |
| "logits/chosen": -0.4159880578517914, | |
| "logits/rejected": -0.37791362404823303, | |
| "logps/chosen": -294.2314758300781, | |
| "logps/rejected": -341.30267333984375, | |
| "loss": 0.3398, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": -3.7123889923095703, | |
| "rewards/confidence": -0.6010817289352417, | |
| "rewards/confidence_mean_diff": 0.6010817289352417, | |
| "rewards/confidence_moving_diff": -0.008823203854262829, | |
| "rewards/margins": 1.9515806436538696, | |
| "rewards/mix_margin": 1.951580286026001, | |
| "rewards/real_percentage": 12.024999618530273, | |
| "rewards/rejected": -5.663969993591309, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.1986032606537916e-07, | |
| "logits/chosen": -0.47558921575546265, | |
| "logits/rejected": -0.4039694368839264, | |
| "logps/chosen": -311.9045104980469, | |
| "logps/rejected": -346.9130554199219, | |
| "loss": 0.4296, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -4.128937721252441, | |
| "rewards/confidence": -0.6023374199867249, | |
| "rewards/confidence_mean_diff": 0.6023374199867249, | |
| "rewards/confidence_moving_diff": 0.006594707258045673, | |
| "rewards/margins": 1.7804996967315674, | |
| "rewards/mix_margin": 1.7804996967315674, | |
| "rewards/real_percentage": 12.050000190734863, | |
| "rewards/rejected": -5.90943717956543, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.146723650296701e-07, | |
| "logits/chosen": -0.46674466133117676, | |
| "logits/rejected": -0.4111348092556, | |
| "logps/chosen": -311.08245849609375, | |
| "logps/rejected": -346.9845275878906, | |
| "loss": 0.4086, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -5.290271282196045, | |
| "rewards/confidence": -0.7440476417541504, | |
| "rewards/confidence_mean_diff": 0.7440476417541504, | |
| "rewards/confidence_moving_diff": -0.0024932101368904114, | |
| "rewards/margins": 2.087628126144409, | |
| "rewards/mix_margin": 2.087628126144409, | |
| "rewards/real_percentage": 12.050000190734863, | |
| "rewards/rejected": -7.377900123596191, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.0935599743717244e-07, | |
| "logits/chosen": -0.44167813658714294, | |
| "logits/rejected": -0.42356547713279724, | |
| "logps/chosen": -298.292724609375, | |
| "logps/rejected": -332.68048095703125, | |
| "loss": 0.3877, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -3.863131284713745, | |
| "rewards/confidence": -0.3811723589897156, | |
| "rewards/confidence_mean_diff": 0.3811723589897156, | |
| "rewards/confidence_moving_diff": -0.004648227244615555, | |
| "rewards/margins": 1.9162908792495728, | |
| "rewards/mix_margin": 1.9162914752960205, | |
| "rewards/real_percentage": 11.875, | |
| "rewards/rejected": -5.779421806335449, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.039153688314145e-07, | |
| "logits/chosen": -0.516374945640564, | |
| "logits/rejected": -0.4751940667629242, | |
| "logps/chosen": -303.4271545410156, | |
| "logps/rejected": -347.4010314941406, | |
| "loss": 0.498, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -4.866153717041016, | |
| "rewards/confidence": -0.6267096996307373, | |
| "rewards/confidence_mean_diff": 0.6267096996307373, | |
| "rewards/confidence_moving_diff": 0.006585358642041683, | |
| "rewards/margins": 1.7950010299682617, | |
| "rewards/mix_margin": 1.7950012683868408, | |
| "rewards/real_percentage": 12.0, | |
| "rewards/rejected": -6.661154270172119, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.983547216509254e-07, | |
| "logits/chosen": -0.40662088990211487, | |
| "logits/rejected": -0.38719242811203003, | |
| "logps/chosen": -308.4295959472656, | |
| "logps/rejected": -354.93035888671875, | |
| "loss": 0.3278, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": -5.220864295959473, | |
| "rewards/confidence": -0.08256378769874573, | |
| "rewards/confidence_mean_diff": 0.08256378769874573, | |
| "rewards/confidence_moving_diff": -0.015401296317577362, | |
| "rewards/margins": 2.610985279083252, | |
| "rewards/mix_margin": 2.610985279083252, | |
| "rewards/real_percentage": 11.824999809265137, | |
| "rewards/rejected": -7.831849575042725, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.9267839192110797e-07, | |
| "logits/chosen": -0.39550715684890747, | |
| "logits/rejected": -0.39764124155044556, | |
| "logps/chosen": -319.3674011230469, | |
| "logps/rejected": -351.965087890625, | |
| "loss": 0.3207, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -4.867480278015137, | |
| "rewards/confidence": -0.0962248146533966, | |
| "rewards/confidence_mean_diff": 0.0962248146533966, | |
| "rewards/confidence_moving_diff": 0.0056266020983457565, | |
| "rewards/margins": 2.221008777618408, | |
| "rewards/mix_margin": 2.221008777618408, | |
| "rewards/real_percentage": 12.125, | |
| "rewards/rejected": -7.088489532470703, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 3.8689080587313755e-07, | |
| "logits/chosen": -0.41592779755592346, | |
| "logits/rejected": -0.3347400724887848, | |
| "logps/chosen": -334.25689697265625, | |
| "logps/rejected": -397.85546875, | |
| "loss": 0.4116, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -7.198552131652832, | |
| "rewards/confidence": -0.2864631712436676, | |
| "rewards/confidence_mean_diff": 0.2864631712436676, | |
| "rewards/confidence_moving_diff": 0.006835117936134338, | |
| "rewards/margins": 2.8038251399993896, | |
| "rewards/mix_margin": 2.8038249015808105, | |
| "rewards/real_percentage": 12.149999618530273, | |
| "rewards/rejected": -10.002375602722168, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.809964764925198e-07, | |
| "logits/chosen": -0.4559716284275055, | |
| "logits/rejected": -0.42797666788101196, | |
| "logps/chosen": -307.8606262207031, | |
| "logps/rejected": -338.77020263671875, | |
| "loss": 0.4022, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -5.137407302856445, | |
| "rewards/confidence": -0.9358379244804382, | |
| "rewards/confidence_mean_diff": 0.9358379244804382, | |
| "rewards/confidence_moving_diff": 0.0033534616231918335, | |
| "rewards/margins": 2.1309828758239746, | |
| "rewards/mix_margin": 2.1309828758239746, | |
| "rewards/real_percentage": 12.100000381469727, | |
| "rewards/rejected": -7.268389701843262, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.75e-07, | |
| "logits/chosen": -0.37641018629074097, | |
| "logits/rejected": -0.3297973573207855, | |
| "logps/chosen": -314.38323974609375, | |
| "logps/rejected": -340.14453125, | |
| "loss": 0.5429, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -4.714179992675781, | |
| "rewards/confidence": -1.3001186847686768, | |
| "rewards/confidence_mean_diff": 1.3001186847686768, | |
| "rewards/confidence_moving_diff": 0.009369373321533203, | |
| "rewards/margins": 1.974973440170288, | |
| "rewards/mix_margin": 1.9749730825424194, | |
| "rewards/real_percentage": 12.100000381469727, | |
| "rewards/rejected": -6.689154148101807, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.689060522675688e-07, | |
| "logits/chosen": -0.5324608087539673, | |
| "logits/rejected": -0.5055150985717773, | |
| "logps/chosen": -303.7776184082031, | |
| "logps/rejected": -342.0851135253906, | |
| "loss": 0.4612, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -3.9132919311523438, | |
| "rewards/confidence": -1.1798120737075806, | |
| "rewards/confidence_mean_diff": 1.1798120737075806, | |
| "rewards/confidence_moving_diff": -0.013687675818800926, | |
| "rewards/margins": 1.8471992015838623, | |
| "rewards/mix_margin": 1.8471992015838623, | |
| "rewards/real_percentage": 11.850000381469727, | |
| "rewards/rejected": -5.760490894317627, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.6271938517235765e-07, | |
| "logits/chosen": -0.4572451114654541, | |
| "logits/rejected": -0.4341673254966736, | |
| "logps/chosen": -335.23284912109375, | |
| "logps/rejected": -379.40472412109375, | |
| "loss": 0.5129, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -7.311725616455078, | |
| "rewards/confidence": -0.7189286351203918, | |
| "rewards/confidence_mean_diff": 0.7189286351203918, | |
| "rewards/confidence_moving_diff": 0.005505113396793604, | |
| "rewards/margins": 2.456300735473633, | |
| "rewards/mix_margin": 2.456300735473633, | |
| "rewards/real_percentage": 12.074999809265137, | |
| "rewards/rejected": -9.768027305603027, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.5644482289126813e-07, | |
| "logits/chosen": -0.36072584986686707, | |
| "logits/rejected": -0.3171803057193756, | |
| "logps/chosen": -313.186279296875, | |
| "logps/rejected": -355.65985107421875, | |
| "loss": 0.4266, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -5.0088701248168945, | |
| "rewards/confidence": -0.6339288353919983, | |
| "rewards/confidence_mean_diff": 0.6339288353919983, | |
| "rewards/confidence_moving_diff": -0.00874006375670433, | |
| "rewards/margins": 2.482173442840576, | |
| "rewards/mix_margin": 2.482173442840576, | |
| "rewards/real_percentage": 11.875, | |
| "rewards/rejected": -7.491044521331787, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.500872581392238e-07, | |
| "logits/chosen": -0.4469270706176758, | |
| "logits/rejected": -0.37663811445236206, | |
| "logps/chosen": -291.22613525390625, | |
| "logps/rejected": -333.7605895996094, | |
| "loss": 0.4558, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -4.05816125869751, | |
| "rewards/confidence": -1.033684492111206, | |
| "rewards/confidence_mean_diff": 1.033684492111206, | |
| "rewards/confidence_moving_diff": -0.0015418336261063814, | |
| "rewards/margins": 1.87485671043396, | |
| "rewards/mix_margin": 1.8748573064804077, | |
| "rewards/real_percentage": 12.100000381469727, | |
| "rewards/rejected": -5.933018207550049, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 3.43651648353978e-07, | |
| "logits/chosen": -0.4613402485847473, | |
| "logits/rejected": -0.4409905970096588, | |
| "logps/chosen": -304.0325622558594, | |
| "logps/rejected": -330.51800537109375, | |
| "loss": 0.5126, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -5.624220371246338, | |
| "rewards/confidence": -0.5434136390686035, | |
| "rewards/confidence_mean_diff": 0.5434136390686035, | |
| "rewards/confidence_moving_diff": 0.0064627365209162235, | |
| "rewards/margins": 1.42355215549469, | |
| "rewards/mix_margin": 1.42355215549469, | |
| "rewards/real_percentage": 12.050000190734863, | |
| "rewards/rejected": -7.047772407531738, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 3.371430118304538e-07, | |
| "logits/chosen": -0.45843037962913513, | |
| "logits/rejected": -0.3802551329135895, | |
| "logps/chosen": -307.3968811035156, | |
| "logps/rejected": -348.0972900390625, | |
| "loss": 0.4577, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": -5.370692253112793, | |
| "rewards/confidence": -0.5198456048965454, | |
| "rewards/confidence_mean_diff": 0.5198456048965454, | |
| "rewards/confidence_moving_diff": 0.0017864800756797194, | |
| "rewards/margins": 2.2920944690704346, | |
| "rewards/mix_margin": 2.2920944690704346, | |
| "rewards/real_percentage": 11.925000190734863, | |
| "rewards/rejected": -7.662786960601807, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 3.305664238076278e-07, | |
| "logits/chosen": -0.5105314254760742, | |
| "logits/rejected": -0.4601234793663025, | |
| "logps/chosen": -305.40435791015625, | |
| "logps/rejected": -345.4449768066406, | |
| "loss": 0.3572, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -5.065144062042236, | |
| "rewards/confidence": -0.6823070049285889, | |
| "rewards/confidence_mean_diff": 0.6823070049285889, | |
| "rewards/confidence_moving_diff": -0.002456407295539975, | |
| "rewards/margins": 1.7983287572860718, | |
| "rewards/mix_margin": 1.7983287572860718, | |
| "rewards/real_percentage": 12.050000190734863, | |
| "rewards/rejected": -6.863473415374756, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 3.2392701251101167e-07, | |
| "logits/chosen": -0.4799830913543701, | |
| "logits/rejected": -0.4452899098396301, | |
| "logps/chosen": -292.88165283203125, | |
| "logps/rejected": -330.19061279296875, | |
| "loss": 0.3728, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -4.039118766784668, | |
| "rewards/confidence": -0.4461361765861511, | |
| "rewards/confidence_mean_diff": 0.4461361765861511, | |
| "rewards/confidence_moving_diff": 0.005477400030940771, | |
| "rewards/margins": 2.1907596588134766, | |
| "rewards/mix_margin": 2.1907596588134766, | |
| "rewards/real_percentage": 11.975000381469727, | |
| "rewards/rejected": -6.2298784255981445, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 3.172299551538164e-07, | |
| "logits/chosen": -0.38940539956092834, | |
| "logits/rejected": -0.38924863934516907, | |
| "logps/chosen": -300.20208740234375, | |
| "logps/rejected": -347.2121887207031, | |
| "loss": 0.362, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -4.930103302001953, | |
| "rewards/confidence": -0.6476409435272217, | |
| "rewards/confidence_mean_diff": 0.6476409435272217, | |
| "rewards/confidence_moving_diff": -0.008157559670507908, | |
| "rewards/margins": 1.8417211771011353, | |
| "rewards/mix_margin": 1.8417211771011353, | |
| "rewards/real_percentage": 11.899999618530273, | |
| "rewards/rejected": -6.771825313568115, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 3.104804738999169e-07, | |
| "logits/chosen": -0.4376412332057953, | |
| "logits/rejected": -0.3436052203178406, | |
| "logps/chosen": -323.06829833984375, | |
| "logps/rejected": -379.4293212890625, | |
| "loss": 0.3471, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -5.260269641876221, | |
| "rewards/confidence": -0.220208078622818, | |
| "rewards/confidence_mean_diff": 0.220208078622818, | |
| "rewards/confidence_moving_diff": -0.0015338037628680468, | |
| "rewards/margins": 2.5263373851776123, | |
| "rewards/mix_margin": 2.5263376235961914, | |
| "rewards/real_percentage": 12.024999618530273, | |
| "rewards/rejected": -7.786606788635254, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 3.036838317917658e-07, | |
| "logits/chosen": -0.5207220911979675, | |
| "logits/rejected": -0.48633185029029846, | |
| "logps/chosen": -298.81634521484375, | |
| "logps/rejected": -343.13702392578125, | |
| "loss": 0.3951, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -4.951300621032715, | |
| "rewards/confidence": -0.6289063692092896, | |
| "rewards/confidence_mean_diff": 0.6289063692092896, | |
| "rewards/confidence_moving_diff": 0.00528533011674881, | |
| "rewards/margins": 2.1019580364227295, | |
| "rewards/mix_margin": 2.101957321166992, | |
| "rewards/real_percentage": 12.074999809265137, | |
| "rewards/rejected": -7.053258419036865, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.968453286464312e-07, | |
| "logits/chosen": -0.3645554184913635, | |
| "logits/rejected": -0.3365468978881836, | |
| "logps/chosen": -319.878173828125, | |
| "logps/rejected": -350.56060791015625, | |
| "loss": 0.3448, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -4.763045310974121, | |
| "rewards/confidence": -0.661165714263916, | |
| "rewards/confidence_mean_diff": 0.661165714263916, | |
| "rewards/confidence_moving_diff": -0.0007452588761225343, | |
| "rewards/margins": 2.119603157043457, | |
| "rewards/mix_margin": 2.119602680206299, | |
| "rewards/real_percentage": 12.024999618530273, | |
| "rewards/rejected": -6.8826494216918945, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.899702969229587e-07, | |
| "logits/chosen": -0.39751607179641724, | |
| "logits/rejected": -0.3644389808177948, | |
| "logps/chosen": -327.7417907714844, | |
| "logps/rejected": -360.3294982910156, | |
| "loss": 0.4697, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -5.276289939880371, | |
| "rewards/confidence": -0.9160386323928833, | |
| "rewards/confidence_mean_diff": 0.9160386323928833, | |
| "rewards/confidence_moving_diff": 0.00511467969045043, | |
| "rewards/margins": 2.0439298152923584, | |
| "rewards/mix_margin": 2.0439295768737793, | |
| "rewards/real_percentage": 12.100000381469727, | |
| "rewards/rejected": -7.32021951675415, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.830640975642806e-07, | |
| "logits/chosen": -0.39188045263290405, | |
| "logits/rejected": -0.37263667583465576, | |
| "logps/chosen": -320.3612365722656, | |
| "logps/rejected": -370.76287841796875, | |
| "loss": 0.3592, | |
| "rewards/accuracies": 0.862500011920929, | |
| "rewards/chosen": -4.933075904846191, | |
| "rewards/confidence": -0.4961981773376465, | |
| "rewards/confidence_mean_diff": 0.4961981773376465, | |
| "rewards/confidence_moving_diff": -0.018065880984067917, | |
| "rewards/margins": 3.3044047355651855, | |
| "rewards/mix_margin": 3.3044047355651855, | |
| "rewards/real_percentage": 11.774999618530273, | |
| "rewards/rejected": -8.237480163574219, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.761321158169134e-07, | |
| "logits/chosen": -0.28580939769744873, | |
| "logits/rejected": -0.2600021958351135, | |
| "logps/chosen": -322.76971435546875, | |
| "logps/rejected": -359.5664367675781, | |
| "loss": 0.3503, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -4.917359352111816, | |
| "rewards/confidence": -0.01223917305469513, | |
| "rewards/confidence_mean_diff": 0.01223917305469513, | |
| "rewards/confidence_moving_diff": 0.012059726752340794, | |
| "rewards/margins": 2.2575831413269043, | |
| "rewards/mix_margin": 2.2575831413269043, | |
| "rewards/real_percentage": 12.199999809265137, | |
| "rewards/rejected": -7.174942970275879, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.6917975703170465e-07, | |
| "logits/chosen": -0.5514649748802185, | |
| "logits/rejected": -0.475651353597641, | |
| "logps/chosen": -302.81915283203125, | |
| "logps/rejected": -340.8907470703125, | |
| "loss": 0.344, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -4.576534271240234, | |
| "rewards/confidence": -0.41131219267845154, | |
| "rewards/confidence_mean_diff": 0.41131219267845154, | |
| "rewards/confidence_moving_diff": 0.0009659202769398689, | |
| "rewards/margins": 2.8126635551452637, | |
| "rewards/mix_margin": 2.8126635551452637, | |
| "rewards/real_percentage": 11.975000381469727, | |
| "rewards/rejected": -7.38919734954834, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.6221244244890336e-07, | |
| "logits/chosen": -0.39924511313438416, | |
| "logits/rejected": -0.3650361895561218, | |
| "logps/chosen": -319.9598083496094, | |
| "logps/rejected": -355.4996032714844, | |
| "loss": 0.3447, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -5.445952415466309, | |
| "rewards/confidence": -0.4980873167514801, | |
| "rewards/confidence_mean_diff": 0.4980873167514801, | |
| "rewards/confidence_moving_diff": -0.006821976043283939, | |
| "rewards/margins": 2.2038683891296387, | |
| "rewards/mix_margin": 2.2038686275482178, | |
| "rewards/real_percentage": 11.824999809265137, | |
| "rewards/rejected": -7.649819850921631, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.5523560497083924e-07, | |
| "logits/chosen": -0.520348072052002, | |
| "logits/rejected": -0.45424094796180725, | |
| "logps/chosen": -301.77264404296875, | |
| "logps/rejected": -348.21533203125, | |
| "loss": 0.3728, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -5.255308628082275, | |
| "rewards/confidence": -0.1299140751361847, | |
| "rewards/confidence_mean_diff": 0.1299140751361847, | |
| "rewards/confidence_moving_diff": 0.0017577748512849212, | |
| "rewards/margins": 2.9795408248901367, | |
| "rewards/mix_margin": 2.9795405864715576, | |
| "rewards/real_percentage": 12.100000381469727, | |
| "rewards/rejected": -8.234848976135254, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.482546849255096e-07, | |
| "logits/chosen": -0.47593337297439575, | |
| "logits/rejected": -0.43128928542137146, | |
| "logps/chosen": -308.0284118652344, | |
| "logps/rejected": -356.2323913574219, | |
| "loss": 0.3804, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -5.071780204772949, | |
| "rewards/confidence": -0.1557270735502243, | |
| "rewards/confidence_mean_diff": 0.1557270735502243, | |
| "rewards/confidence_moving_diff": -0.0029389006085693836, | |
| "rewards/margins": 3.049848794937134, | |
| "rewards/mix_margin": 3.049848794937134, | |
| "rewards/real_percentage": 11.875, | |
| "rewards/rejected": -8.12162971496582, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.412751258243748e-07, | |
| "logits/chosen": -0.5331052541732788, | |
| "logits/rejected": -0.45946574211120605, | |
| "logps/chosen": -295.99664306640625, | |
| "logps/rejected": -342.9468078613281, | |
| "loss": 0.3445, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -4.204689979553223, | |
| "rewards/confidence": 0.04942571744322777, | |
| "rewards/confidence_mean_diff": -0.04942571744322777, | |
| "rewards/confidence_moving_diff": -0.0032533840276300907, | |
| "rewards/margins": 3.234083652496338, | |
| "rewards/mix_margin": 3.2340831756591797, | |
| "rewards/real_percentage": 11.949999809265137, | |
| "rewards/rejected": -7.438773155212402, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.3430237011767164e-07, | |
| "logits/chosen": -0.553652286529541, | |
| "logits/rejected": -0.48139438033103943, | |
| "logps/chosen": -323.6736755371094, | |
| "logps/rejected": -363.87115478515625, | |
| "loss": 0.3215, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": -6.37005090713501, | |
| "rewards/confidence": -0.150817409157753, | |
| "rewards/confidence_mean_diff": 0.150817409157753, | |
| "rewards/confidence_moving_diff": 0.0038863657973706722, | |
| "rewards/margins": 2.2579023838043213, | |
| "rewards/mix_margin": 2.257902145385742, | |
| "rewards/real_percentage": 12.050000190734863, | |
| "rewards/rejected": -8.627952575683594, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.2734185495055498e-07, | |
| "logits/chosen": -0.5193800330162048, | |
| "logits/rejected": -0.46058887243270874, | |
| "logps/chosen": -323.987060546875, | |
| "logps/rejected": -374.1817932128906, | |
| "loss": 0.3892, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -6.0654520988464355, | |
| "rewards/confidence": -0.4744696021080017, | |
| "rewards/confidence_mean_diff": 0.4744696021080017, | |
| "rewards/confidence_moving_diff": 0.013912144117057323, | |
| "rewards/margins": 1.95986008644104, | |
| "rewards/mix_margin": 1.95986008644104, | |
| "rewards/real_percentage": 12.125, | |
| "rewards/rejected": -8.025312423706055, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.2039900792337474e-07, | |
| "logits/chosen": -0.4347326159477234, | |
| "logits/rejected": -0.39321279525756836, | |
| "logps/chosen": -298.956298828125, | |
| "logps/rejected": -342.5967712402344, | |
| "loss": 0.4045, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -5.106101036071777, | |
| "rewards/confidence": -0.6167746186256409, | |
| "rewards/confidence_mean_diff": 0.6167746186256409, | |
| "rewards/confidence_moving_diff": -0.012726289220154285, | |
| "rewards/margins": 2.3975090980529785, | |
| "rewards/mix_margin": 2.3975090980529785, | |
| "rewards/real_percentage": 11.899999618530273, | |
| "rewards/rejected": -7.503609657287598, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 2.1347924285939712e-07, | |
| "logits/chosen": -0.44474563002586365, | |
| "logits/rejected": -0.3617003560066223, | |
| "logps/chosen": -298.63287353515625, | |
| "logps/rejected": -361.87591552734375, | |
| "loss": 0.3331, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -5.2312774658203125, | |
| "rewards/confidence": -0.16749832034111023, | |
| "rewards/confidence_mean_diff": 0.16749832034111023, | |
| "rewards/confidence_moving_diff": -0.0019406508654356003, | |
| "rewards/margins": 3.5485405921936035, | |
| "rewards/mix_margin": 3.5485405921936035, | |
| "rewards/real_percentage": 11.949999809265137, | |
| "rewards/rejected": -8.779818534851074, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.065879555832674e-07, | |
| "logits/chosen": -0.5316728949546814, | |
| "logits/rejected": -0.5006137490272522, | |
| "logps/chosen": -323.6309509277344, | |
| "logps/rejected": -360.2073669433594, | |
| "loss": 0.4018, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -5.480199337005615, | |
| "rewards/confidence": -0.6847819089889526, | |
| "rewards/confidence_mean_diff": 0.6847819089889526, | |
| "rewards/confidence_moving_diff": 0.015784386545419693, | |
| "rewards/margins": 2.00040340423584, | |
| "rewards/mix_margin": 2.00040340423584, | |
| "rewards/real_percentage": 12.050000190734863, | |
| "rewards/rejected": -7.480602264404297, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.9973051971350888e-07, | |
| "logits/chosen": -0.533614993095398, | |
| "logits/rejected": -0.5051816701889038, | |
| "logps/chosen": -326.9102478027344, | |
| "logps/rejected": -361.22027587890625, | |
| "loss": 0.3655, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -5.565978050231934, | |
| "rewards/confidence": -0.7230534553527832, | |
| "rewards/confidence_mean_diff": 0.7230534553527832, | |
| "rewards/confidence_moving_diff": -0.014468440786004066, | |
| "rewards/margins": 2.4039742946624756, | |
| "rewards/mix_margin": 2.4039740562438965, | |
| "rewards/real_percentage": 11.925000190734863, | |
| "rewards/rejected": -7.969951629638672, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.9291228247233603e-07, | |
| "logits/chosen": -0.3912977874279022, | |
| "logits/rejected": -0.3417341411113739, | |
| "logps/chosen": -323.6977844238281, | |
| "logps/rejected": -366.582763671875, | |
| "loss": 0.4175, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -5.948014259338379, | |
| "rewards/confidence": -0.25725242495536804, | |
| "rewards/confidence_mean_diff": 0.25725242495536804, | |
| "rewards/confidence_moving_diff": 0.013772556558251381, | |
| "rewards/margins": 2.5766825675964355, | |
| "rewards/mix_margin": 2.5766825675964355, | |
| "rewards/real_percentage": 12.074999809265137, | |
| "rewards/rejected": -8.524697303771973, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.861385605160524e-07, | |
| "logits/chosen": -0.4154201149940491, | |
| "logits/rejected": -0.4020150601863861, | |
| "logps/chosen": -311.03448486328125, | |
| "logps/rejected": -354.7017822265625, | |
| "loss": 0.361, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -6.06185245513916, | |
| "rewards/confidence": -0.8854671716690063, | |
| "rewards/confidence_mean_diff": 0.8854671716690063, | |
| "rewards/confidence_moving_diff": -0.009208135306835175, | |
| "rewards/margins": 2.3469111919403076, | |
| "rewards/mix_margin": 2.346911668777466, | |
| "rewards/real_percentage": 11.925000190734863, | |
| "rewards/rejected": -8.408763885498047, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.7941463578928083e-07, | |
| "logits/chosen": -0.4222695231437683, | |
| "logits/rejected": -0.3983038365840912, | |
| "logps/chosen": -319.29034423828125, | |
| "logps/rejected": -351.5157775878906, | |
| "loss": 0.3587, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -5.011105537414551, | |
| "rewards/confidence": -0.3240852952003479, | |
| "rewards/confidence_mean_diff": 0.3240852952003479, | |
| "rewards/confidence_moving_diff": -0.00017112810746766627, | |
| "rewards/margins": 2.106672763824463, | |
| "rewards/mix_margin": 2.106672763824463, | |
| "rewards/real_percentage": 12.050000190734863, | |
| "rewards/rejected": -7.1177778244018555, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.7274575140626315e-07, | |
| "logits/chosen": -0.4527736306190491, | |
| "logits/rejected": -0.435302197933197, | |
| "logps/chosen": -306.79571533203125, | |
| "logps/rejected": -344.436767578125, | |
| "loss": 0.3299, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -5.748501300811768, | |
| "rewards/confidence": -0.08212902396917343, | |
| "rewards/confidence_mean_diff": 0.08212902396917343, | |
| "rewards/confidence_moving_diff": -0.008926677517592907, | |
| "rewards/margins": 2.3505711555480957, | |
| "rewards/mix_margin": 2.3505709171295166, | |
| "rewards/real_percentage": 11.925000190734863, | |
| "rewards/rejected": -8.099072456359863, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.6613710756243627e-07, | |
| "logits/chosen": -0.44862475991249084, | |
| "logits/rejected": -0.4289964735507965, | |
| "logps/chosen": -294.82293701171875, | |
| "logps/rejected": -337.2476501464844, | |
| "loss": 0.3643, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -4.9694952964782715, | |
| "rewards/confidence": -0.6442267298698425, | |
| "rewards/confidence_mean_diff": 0.6442267298698425, | |
| "rewards/confidence_moving_diff": 0.01766865886747837, | |
| "rewards/margins": 2.3822617530822754, | |
| "rewards/mix_margin": 2.382261276245117, | |
| "rewards/real_percentage": 12.149999618530273, | |
| "rewards/rejected": -7.351757049560547, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.5959385747947695e-07, | |
| "logits/chosen": -0.35890644788742065, | |
| "logits/rejected": -0.349406898021698, | |
| "logps/chosen": -325.7987365722656, | |
| "logps/rejected": -362.1269836425781, | |
| "loss": 0.3559, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -5.5250244140625, | |
| "rewards/confidence": -0.2770170569419861, | |
| "rewards/confidence_mean_diff": 0.2770170569419861, | |
| "rewards/confidence_moving_diff": -0.01518084667623043, | |
| "rewards/margins": 2.282301187515259, | |
| "rewards/mix_margin": 2.2823004722595215, | |
| "rewards/real_percentage": 11.875, | |
| "rewards/rejected": -7.8073248863220215, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.5312110338697427e-07, | |
| "logits/chosen": -0.4841701090335846, | |
| "logits/rejected": -0.4870760440826416, | |
| "logps/chosen": -315.62542724609375, | |
| "logps/rejected": -368.21038818359375, | |
| "loss": 0.3951, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -5.752788543701172, | |
| "rewards/confidence": -0.34614765644073486, | |
| "rewards/confidence_mean_diff": 0.34614765644073486, | |
| "rewards/confidence_moving_diff": 4.3201445805607364e-05, | |
| "rewards/margins": 2.3943512439727783, | |
| "rewards/mix_margin": 2.394350528717041, | |
| "rewards/real_percentage": 12.125, | |
| "rewards/rejected": -8.147139549255371, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.4672389254386457e-07, | |
| "logits/chosen": -0.5643536448478699, | |
| "logits/rejected": -0.4983779489994049, | |
| "logps/chosen": -304.97998046875, | |
| "logps/rejected": -360.8329772949219, | |
| "loss": 0.3309, | |
| "rewards/accuracies": 0.887499988079071, | |
| "rewards/chosen": -5.896836280822754, | |
| "rewards/confidence": 0.3215560019016266, | |
| "rewards/confidence_mean_diff": -0.3215560019016266, | |
| "rewards/confidence_moving_diff": 0.0016240788390859962, | |
| "rewards/margins": 2.7777109146118164, | |
| "rewards/mix_margin": 2.7777106761932373, | |
| "rewards/real_percentage": 12.050000190734863, | |
| "rewards/rejected": -8.67454719543457, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.404072133027306e-07, | |
| "logits/chosen": -0.5274654030799866, | |
| "logits/rejected": -0.5008283853530884, | |
| "logps/chosen": -328.6963195800781, | |
| "logps/rejected": -365.78875732421875, | |
| "loss": 0.4045, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -6.452589988708496, | |
| "rewards/confidence": -0.3352366089820862, | |
| "rewards/confidence_mean_diff": 0.3352366089820862, | |
| "rewards/confidence_moving_diff": 0.014083778485655785, | |
| "rewards/margins": 2.0285284519195557, | |
| "rewards/mix_margin": 2.0285284519195557, | |
| "rewards/real_percentage": 12.074999809265137, | |
| "rewards/rejected": -8.481119155883789, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.341759912200346e-07, | |
| "logits/chosen": -0.4335179328918457, | |
| "logits/rejected": -0.4630247950553894, | |
| "logps/chosen": -319.6604309082031, | |
| "logps/rejected": -351.862548828125, | |
| "loss": 0.3775, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -5.491322994232178, | |
| "rewards/confidence": -0.9161689877510071, | |
| "rewards/confidence_mean_diff": 0.9161689877510071, | |
| "rewards/confidence_moving_diff": -0.011617189273238182, | |
| "rewards/margins": 2.3194267749786377, | |
| "rewards/mix_margin": 2.319427013397217, | |
| "rewards/real_percentage": 12.0, | |
| "rewards/rejected": -7.8107500076293945, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.2803508521531677e-07, | |
| "logits/chosen": -0.5323294401168823, | |
| "logits/rejected": -0.47418832778930664, | |
| "logps/chosen": -325.2956848144531, | |
| "logps/rejected": -360.1390075683594, | |
| "loss": 0.3776, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -5.377198219299316, | |
| "rewards/confidence": -0.4973165988922119, | |
| "rewards/confidence_mean_diff": 0.4973165988922119, | |
| "rewards/confidence_moving_diff": 0.005177941173315048, | |
| "rewards/margins": 2.249694347381592, | |
| "rewards/mix_margin": 2.2496941089630127, | |
| "rewards/real_percentage": 12.100000381469727, | |
| "rewards/rejected": -7.626893043518066, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.2198928378235715e-07, | |
| "logits/chosen": -0.5082569718360901, | |
| "logits/rejected": -0.45084744691848755, | |
| "logps/chosen": -320.08355712890625, | |
| "logps/rejected": -379.03448486328125, | |
| "loss": 0.3637, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -5.210715293884277, | |
| "rewards/confidence": -0.19475048780441284, | |
| "rewards/confidence_mean_diff": 0.19475048780441284, | |
| "rewards/confidence_moving_diff": -0.009198799729347229, | |
| "rewards/margins": 2.6074321269989014, | |
| "rewards/mix_margin": 2.6074321269989014, | |
| "rewards/real_percentage": 12.0, | |
| "rewards/rejected": -7.818148136138916, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.1604330125525078e-07, | |
| "logits/chosen": -0.3887682259082794, | |
| "logits/rejected": -0.35597771406173706, | |
| "logps/chosen": -331.58807373046875, | |
| "logps/rejected": -367.07220458984375, | |
| "loss": 0.3699, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -5.553780555725098, | |
| "rewards/confidence": -0.24257361888885498, | |
| "rewards/confidence_mean_diff": 0.24257361888885498, | |
| "rewards/confidence_moving_diff": 0.0054216100834310055, | |
| "rewards/margins": 2.749530792236328, | |
| "rewards/mix_margin": 2.7495312690734863, | |
| "rewards/real_percentage": 12.149999618530273, | |
| "rewards/rejected": -8.303311347961426, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.1020177413231332e-07, | |
| "logits/chosen": -0.532088041305542, | |
| "logits/rejected": -0.46247783303260803, | |
| "logps/chosen": -308.42864990234375, | |
| "logps/rejected": -362.3594970703125, | |
| "loss": 0.3448, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -5.45842170715332, | |
| "rewards/confidence": -0.279445618391037, | |
| "rewards/confidence_mean_diff": 0.279445618391037, | |
| "rewards/confidence_moving_diff": -0.007828864268958569, | |
| "rewards/margins": 2.8715789318084717, | |
| "rewards/mix_margin": 2.8715789318084717, | |
| "rewards/real_percentage": 11.949999809265137, | |
| "rewards/rejected": -8.330000877380371, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.0446925746067766e-07, | |
| "logits/chosen": -0.39833885431289673, | |
| "logits/rejected": -0.32208365201950073, | |
| "logps/chosen": -323.56463623046875, | |
| "logps/rejected": -375.4222717285156, | |
| "loss": 0.3684, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -5.711199760437012, | |
| "rewards/confidence": -0.6064328551292419, | |
| "rewards/confidence_mean_diff": 0.6064328551292419, | |
| "rewards/confidence_moving_diff": 0.008294849656522274, | |
| "rewards/margins": 2.7784066200256348, | |
| "rewards/mix_margin": 2.778406858444214, | |
| "rewards/real_percentage": 12.050000190734863, | |
| "rewards/rejected": -8.489605903625488, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 9.885022128440629e-08, | |
| "logits/chosen": -0.4562014639377594, | |
| "logits/rejected": -0.44077634811401367, | |
| "logps/chosen": -336.533447265625, | |
| "logps/rejected": -366.4312744140625, | |
| "loss": 0.4618, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -6.210871696472168, | |
| "rewards/confidence": -0.26110726594924927, | |
| "rewards/confidence_mean_diff": 0.26110726594924927, | |
| "rewards/confidence_moving_diff": -0.0007525529945269227, | |
| "rewards/margins": 2.1315207481384277, | |
| "rewards/mix_margin": 2.1315207481384277, | |
| "rewards/real_percentage": 11.925000190734863, | |
| "rewards/rejected": -8.34239387512207, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 9.334904715888494e-08, | |
| "logits/chosen": -0.4822518825531006, | |
| "logits/rejected": -0.47051340341567993, | |
| "logps/chosen": -314.5094299316406, | |
| "logps/rejected": -344.85235595703125, | |
| "loss": 0.3055, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -4.845602035522461, | |
| "rewards/confidence": -0.33256375789642334, | |
| "rewards/confidence_mean_diff": 0.33256375789642334, | |
| "rewards/confidence_moving_diff": -0.008665810339152813, | |
| "rewards/margins": 2.6286842823028564, | |
| "rewards/mix_margin": 2.6286845207214355, | |
| "rewards/real_percentage": 11.925000190734863, | |
| "rewards/rejected": -7.4742865562438965, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 8.797002473421727e-08, | |
| "logits/chosen": -0.41179442405700684, | |
| "logits/rejected": -0.406447172164917, | |
| "logps/chosen": -306.6711120605469, | |
| "logps/rejected": -349.2420959472656, | |
| "loss": 0.334, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -5.130354881286621, | |
| "rewards/confidence": -0.122930146753788, | |
| "rewards/confidence_mean_diff": 0.122930146753788, | |
| "rewards/confidence_moving_diff": 0.00898961815983057, | |
| "rewards/margins": 2.354593276977539, | |
| "rewards/mix_margin": 2.35459303855896, | |
| "rewards/real_percentage": 12.175000190734863, | |
| "rewards/rejected": -7.48494815826416, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 8.271734841028552e-08, | |
| "logits/chosen": -0.5643023252487183, | |
| "logits/rejected": -0.5298415422439575, | |
| "logps/chosen": -323.9385070800781, | |
| "logps/rejected": -373.266845703125, | |
| "loss": 0.3558, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -5.833059787750244, | |
| "rewards/confidence": -0.38230979442596436, | |
| "rewards/confidence_mean_diff": 0.38230979442596436, | |
| "rewards/confidence_moving_diff": 0.004303359426558018, | |
| "rewards/margins": 2.2788245677948, | |
| "rewards/mix_margin": 2.278825283050537, | |
| "rewards/real_percentage": 12.074999809265137, | |
| "rewards/rejected": -8.111884117126465, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 7.759511406608255e-08, | |
| "logits/chosen": -0.501059889793396, | |
| "logits/rejected": -0.46744924783706665, | |
| "logps/chosen": -308.55816650390625, | |
| "logps/rejected": -351.38226318359375, | |
| "loss": 0.3868, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -5.338001728057861, | |
| "rewards/confidence": -0.9279301762580872, | |
| "rewards/confidence_mean_diff": 0.9279301762580872, | |
| "rewards/confidence_moving_diff": 0.0030523687601089478, | |
| "rewards/margins": 2.3086838722229004, | |
| "rewards/mix_margin": 2.3086836338043213, | |
| "rewards/real_percentage": 12.024999618530273, | |
| "rewards/rejected": -7.646686553955078, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 7.260731586586982e-08, | |
| "logits/chosen": -0.43802008032798767, | |
| "logits/rejected": -0.4071362018585205, | |
| "logps/chosen": -322.073486328125, | |
| "logps/rejected": -366.12799072265625, | |
| "loss": 0.4159, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -6.1254730224609375, | |
| "rewards/confidence": -0.8133270144462585, | |
| "rewards/confidence_mean_diff": 0.8133270144462585, | |
| "rewards/confidence_moving_diff": -0.001786289387382567, | |
| "rewards/margins": 2.0772204399108887, | |
| "rewards/mix_margin": 2.0772206783294678, | |
| "rewards/real_percentage": 12.0, | |
| "rewards/rejected": -8.202692985534668, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 6.775784314464716e-08, | |
| "logits/chosen": -0.5343870520591736, | |
| "logits/rejected": -0.5283997654914856, | |
| "logps/chosen": -309.2534484863281, | |
| "logps/rejected": -355.0782775878906, | |
| "loss": 0.3445, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": -5.623459815979004, | |
| "rewards/confidence": -0.4709520936012268, | |
| "rewards/confidence_mean_diff": 0.4709520936012268, | |
| "rewards/confidence_moving_diff": -0.011543579399585724, | |
| "rewards/margins": 2.5200109481811523, | |
| "rewards/mix_margin": 2.5200109481811523, | |
| "rewards/real_percentage": 11.800000190734863, | |
| "rewards/rejected": -8.14346981048584, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.305047737536707e-08, | |
| "logits/chosen": -0.48535990715026855, | |
| "logits/rejected": -0.4693097174167633, | |
| "logps/chosen": -301.3177795410156, | |
| "logps/rejected": -342.46942138671875, | |
| "loss": 0.3091, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -4.833828926086426, | |
| "rewards/confidence": -0.10846276581287384, | |
| "rewards/confidence_mean_diff": 0.10846276581287384, | |
| "rewards/confidence_moving_diff": 0.0017892271280288696, | |
| "rewards/margins": 2.646871328353882, | |
| "rewards/mix_margin": 2.646871328353882, | |
| "rewards/real_percentage": 12.100000381469727, | |
| "rewards/rejected": -7.480700492858887, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 5.848888922025552e-08, | |
| "logits/chosen": -0.4510215222835541, | |
| "logits/rejected": -0.40784555673599243, | |
| "logps/chosen": -329.4226379394531, | |
| "logps/rejected": -374.3578186035156, | |
| "loss": 0.3277, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": -5.446955680847168, | |
| "rewards/confidence": 0.001511894864961505, | |
| "rewards/confidence_mean_diff": -0.001511894864961505, | |
| "rewards/confidence_moving_diff": -0.003926668781787157, | |
| "rewards/margins": 2.8011462688446045, | |
| "rewards/mix_margin": 2.8011462688446045, | |
| "rewards/real_percentage": 11.875, | |
| "rewards/rejected": -8.248102188110352, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_logits/chosen": -0.38191908597946167, | |
| "eval_logits/rejected": -0.32851406931877136, | |
| "eval_logps/chosen": -401.91107177734375, | |
| "eval_logps/rejected": -380.09014892578125, | |
| "eval_loss": 0.9255023002624512, | |
| "eval_rewards/accuracies": 0.6666666865348816, | |
| "eval_rewards/chosen": -7.030445575714111, | |
| "eval_rewards/margins": 0.9350523352622986, | |
| "eval_rewards/mix_margin": 0.9350523948669434, | |
| "eval_rewards/rejected": -7.965498447418213, | |
| "eval_runtime": 167.8148, | |
| "eval_samples_per_second": 5.464, | |
| "eval_steps_per_second": 2.735, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.4076635668540065e-08, | |
| "logits/chosen": -0.4243415892124176, | |
| "logits/rejected": -0.38142484426498413, | |
| "logps/chosen": -325.0403747558594, | |
| "logps/rejected": -364.5411071777344, | |
| "loss": 0.35, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -5.772444248199463, | |
| "rewards/confidence": -0.432574599981308, | |
| "rewards/confidence_mean_diff": 0.432574599981308, | |
| "rewards/confidence_moving_diff": 0.008553228341042995, | |
| "rewards/margins": 2.2061939239501953, | |
| "rewards/mix_margin": 2.2061939239501953, | |
| "rewards/real_percentage": 12.074999809265137, | |
| "rewards/rejected": -7.9786376953125, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.981715726281666e-08, | |
| "logits/chosen": -0.4008959233760834, | |
| "logits/rejected": -0.3528580069541931, | |
| "logps/chosen": -309.729736328125, | |
| "logps/rejected": -342.2464904785156, | |
| "loss": 0.3681, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -5.356497287750244, | |
| "rewards/confidence": -0.5101180672645569, | |
| "rewards/confidence_mean_diff": 0.5101180672645569, | |
| "rewards/confidence_moving_diff": 0.008787031285464764, | |
| "rewards/margins": 2.4204485416412354, | |
| "rewards/mix_margin": 2.4204483032226562, | |
| "rewards/real_percentage": 12.024999618530273, | |
| "rewards/rejected": -7.7769455909729, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 4.5713775416217875e-08, | |
| "logits/chosen": -0.4478469789028168, | |
| "logits/rejected": -0.37518566846847534, | |
| "logps/chosen": -323.7912292480469, | |
| "logps/rejected": -356.94683837890625, | |
| "loss": 0.2901, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -5.483048915863037, | |
| "rewards/confidence": -0.15089502930641174, | |
| "rewards/confidence_mean_diff": 0.15089502930641174, | |
| "rewards/confidence_moving_diff": -0.013523884117603302, | |
| "rewards/margins": 2.2260966300964355, | |
| "rewards/mix_margin": 2.2260966300964355, | |
| "rewards/real_percentage": 11.949999809265137, | |
| "rewards/rejected": -7.709145545959473, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.176968982247514e-08, | |
| "logits/chosen": -0.355146586894989, | |
| "logits/rejected": -0.27580901980400085, | |
| "logps/chosen": -305.0616760253906, | |
| "logps/rejected": -360.4013366699219, | |
| "loss": 0.3579, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -5.155121326446533, | |
| "rewards/confidence": -0.6618790626525879, | |
| "rewards/confidence_mean_diff": 0.6618790626525879, | |
| "rewards/confidence_moving_diff": 0.020888501778244972, | |
| "rewards/margins": 2.6330745220184326, | |
| "rewards/mix_margin": 2.6330742835998535, | |
| "rewards/real_percentage": 12.149999618530273, | |
| "rewards/rejected": -7.788195610046387, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.798797596089351e-08, | |
| "logits/chosen": -0.523894190788269, | |
| "logits/rejected": -0.47869396209716797, | |
| "logps/chosen": -336.5243835449219, | |
| "logps/rejected": -376.15911865234375, | |
| "loss": 0.3243, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -5.476739406585693, | |
| "rewards/confidence": -0.7103861570358276, | |
| "rewards/confidence_mean_diff": 0.7103861570358276, | |
| "rewards/confidence_moving_diff": -0.018846889957785606, | |
| "rewards/margins": 2.7205963134765625, | |
| "rewards/mix_margin": 2.7205967903137207, | |
| "rewards/real_percentage": 11.824999809265137, | |
| "rewards/rejected": -8.197336196899414, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.437158269818563e-08, | |
| "logits/chosen": -0.43338078260421753, | |
| "logits/rejected": -0.40491288900375366, | |
| "logps/chosen": -292.83026123046875, | |
| "logps/rejected": -329.04949951171875, | |
| "loss": 0.3273, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": -4.835805416107178, | |
| "rewards/confidence": -0.07940448820590973, | |
| "rewards/confidence_mean_diff": 0.07940448820590973, | |
| "rewards/confidence_moving_diff": 0.006107149180024862, | |
| "rewards/margins": 2.4079442024230957, | |
| "rewards/mix_margin": 2.4079442024230957, | |
| "rewards/real_percentage": 12.125, | |
| "rewards/rejected": -7.243750095367432, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.092332998903416e-08, | |
| "logits/chosen": -0.48394671082496643, | |
| "logits/rejected": -0.45265993475914, | |
| "logps/chosen": -303.0623779296875, | |
| "logps/rejected": -352.133544921875, | |
| "loss": 0.3628, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -5.054957866668701, | |
| "rewards/confidence": -0.43090638518333435, | |
| "rewards/confidence_mean_diff": 0.43090638518333435, | |
| "rewards/confidence_moving_diff": 0.00020202621817588806, | |
| "rewards/margins": 2.8925867080688477, | |
| "rewards/mix_margin": 2.892587184906006, | |
| "rewards/real_percentage": 12.125, | |
| "rewards/rejected": -7.947544097900391, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 2.764590667717562e-08, | |
| "logits/chosen": -0.463339239358902, | |
| "logits/rejected": -0.42284661531448364, | |
| "logps/chosen": -305.8697814941406, | |
| "logps/rejected": -351.04010009765625, | |
| "loss": 0.3358, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -5.207819938659668, | |
| "rewards/confidence": -0.503353476524353, | |
| "rewards/confidence_mean_diff": 0.503353476524353, | |
| "rewards/confidence_moving_diff": -0.0014690018724650145, | |
| "rewards/margins": 2.403524398803711, | |
| "rewards/mix_margin": 2.403524160385132, | |
| "rewards/real_percentage": 12.024999618530273, | |
| "rewards/rejected": -7.611344337463379, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 2.4541868398721576e-08, | |
| "logits/chosen": -0.45686477422714233, | |
| "logits/rejected": -0.38911059498786926, | |
| "logps/chosen": -314.32763671875, | |
| "logps/rejected": -367.2061462402344, | |
| "loss": 0.2716, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -5.678936958312988, | |
| "rewards/confidence": -0.2548510432243347, | |
| "rewards/confidence_mean_diff": 0.2548510432243347, | |
| "rewards/confidence_moving_diff": -0.011963443830609322, | |
| "rewards/margins": 2.9311575889587402, | |
| "rewards/mix_margin": 2.9311578273773193, | |
| "rewards/real_percentage": 11.899999618530273, | |
| "rewards/rejected": -8.610095024108887, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 2.1613635589349756e-08, | |
| "logits/chosen": -0.426119327545166, | |
| "logits/rejected": -0.40804940462112427, | |
| "logps/chosen": -317.08135986328125, | |
| "logps/rejected": -366.5263977050781, | |
| "loss": 0.322, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -6.069199562072754, | |
| "rewards/confidence": 0.03942107781767845, | |
| "rewards/confidence_mean_diff": -0.03942107781767845, | |
| "rewards/confidence_moving_diff": 0.009027032181620598, | |
| "rewards/margins": 2.5976197719573975, | |
| "rewards/mix_margin": 2.5976197719573975, | |
| "rewards/real_percentage": 12.149999618530273, | |
| "rewards/rejected": -8.666818618774414, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 1.8863491596921743e-08, | |
| "logits/chosen": -0.49721455574035645, | |
| "logits/rejected": -0.4590892791748047, | |
| "logps/chosen": -308.40606689453125, | |
| "logps/rejected": -347.5412292480469, | |
| "loss": 0.2732, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -5.372550010681152, | |
| "rewards/confidence": -0.15715138614177704, | |
| "rewards/confidence_mean_diff": 0.15715138614177704, | |
| "rewards/confidence_moving_diff": -0.004644413013011217, | |
| "rewards/margins": 2.7416749000549316, | |
| "rewards/mix_margin": 2.7416751384735107, | |
| "rewards/real_percentage": 11.925000190734863, | |
| "rewards/rejected": -8.114225387573242, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 1.629358090099639e-08, | |
| "logits/chosen": -0.405491441488266, | |
| "logits/rejected": -0.34963005781173706, | |
| "logps/chosen": -313.4161071777344, | |
| "logps/rejected": -358.3491516113281, | |
| "loss": 0.358, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -5.984471321105957, | |
| "rewards/confidence": -0.07869835197925568, | |
| "rewards/confidence_mean_diff": 0.07869835197925568, | |
| "rewards/confidence_moving_diff": 0.005973615683615208, | |
| "rewards/margins": 2.347227096557617, | |
| "rewards/mix_margin": 2.3472275733947754, | |
| "rewards/real_percentage": 12.199999809265137, | |
| "rewards/rejected": -8.331698417663574, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 1.390590744062975e-08, | |
| "logits/chosen": -0.5188131332397461, | |
| "logits/rejected": -0.5038987398147583, | |
| "logps/chosen": -309.9880676269531, | |
| "logps/rejected": -366.2044677734375, | |
| "loss": 0.2672, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -5.696852684020996, | |
| "rewards/confidence": -0.4389854073524475, | |
| "rewards/confidence_mean_diff": 0.4389854073524475, | |
| "rewards/confidence_moving_diff": -0.006844349205493927, | |
| "rewards/margins": 2.768991470336914, | |
| "rewards/mix_margin": 2.768991470336914, | |
| "rewards/real_percentage": 11.875, | |
| "rewards/rejected": -8.465843200683594, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 1.1702333051763268e-08, | |
| "logits/chosen": -0.4130728244781494, | |
| "logits/rejected": -0.3766258955001831, | |
| "logps/chosen": -326.6462707519531, | |
| "logps/rejected": -371.1008605957031, | |
| "loss": 0.3447, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -5.4158220291137695, | |
| "rewards/confidence": -0.11326966434717178, | |
| "rewards/confidence_mean_diff": 0.11326966434717178, | |
| "rewards/confidence_moving_diff": 0.007706183008849621, | |
| "rewards/margins": 2.9776198863983154, | |
| "rewards/mix_margin": 2.9776198863983154, | |
| "rewards/real_percentage": 12.0, | |
| "rewards/rejected": -8.393442153930664, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 9.684576015420275e-09, | |
| "logits/chosen": -0.525262176990509, | |
| "logits/rejected": -0.44665464758872986, | |
| "logps/chosen": -320.39227294921875, | |
| "logps/rejected": -344.9775085449219, | |
| "loss": 0.3635, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -5.540863513946533, | |
| "rewards/confidence": -0.26441746950149536, | |
| "rewards/confidence_mean_diff": 0.26441746950149536, | |
| "rewards/confidence_moving_diff": 0.005520409904420376, | |
| "rewards/margins": 2.0581774711608887, | |
| "rewards/mix_margin": 2.0581774711608887, | |
| "rewards/real_percentage": 12.100000381469727, | |
| "rewards/rejected": -7.599040985107422, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 7.85420971784223e-09, | |
| "logits/chosen": -0.32916390895843506, | |
| "logits/rejected": -0.30752667784690857, | |
| "logps/chosen": -302.265869140625, | |
| "logps/rejected": -344.9718017578125, | |
| "loss": 0.3833, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -5.624446868896484, | |
| "rewards/confidence": -0.47195807099342346, | |
| "rewards/confidence_mean_diff": 0.47195807099342346, | |
| "rewards/confidence_moving_diff": -0.007561626844108105, | |
| "rewards/margins": 2.270535707473755, | |
| "rewards/mix_margin": 2.270535707473755, | |
| "rewards/real_percentage": 11.949999809265137, | |
| "rewards/rejected": -7.894982814788818, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 6.2126614236091834e-09, | |
| "logits/chosen": -0.48796114325523376, | |
| "logits/rejected": -0.45743227005004883, | |
| "logps/chosen": -314.5947265625, | |
| "logps/rejected": -350.1982116699219, | |
| "loss": 0.3892, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -5.277219772338867, | |
| "rewards/confidence": -0.6118943095207214, | |
| "rewards/confidence_mean_diff": 0.6118943095207214, | |
| "rewards/confidence_moving_diff": 0.001549577689729631, | |
| "rewards/margins": 2.173887014389038, | |
| "rewards/mix_margin": 2.173886775970459, | |
| "rewards/real_percentage": 12.0, | |
| "rewards/rejected": -7.451106071472168, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 4.761211162702117e-09, | |
| "logits/chosen": -0.5557531118392944, | |
| "logits/rejected": -0.5374355316162109, | |
| "logps/chosen": -349.25860595703125, | |
| "logps/rejected": -390.11981201171875, | |
| "loss": 0.3355, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -6.486425876617432, | |
| "rewards/confidence": -0.4561425745487213, | |
| "rewards/confidence_mean_diff": 0.4561425745487213, | |
| "rewards/confidence_moving_diff": -0.0020445152185857296, | |
| "rewards/margins": 2.195816993713379, | |
| "rewards/mix_margin": 2.1958167552948, | |
| "rewards/real_percentage": 12.0, | |
| "rewards/rejected": -8.682242393493652, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 3.5009907323737818e-09, | |
| "logits/chosen": -0.5298832654953003, | |
| "logits/rejected": -0.48022761940956116, | |
| "logps/chosen": -326.51580810546875, | |
| "logps/rejected": -366.4398193359375, | |
| "loss": 0.3048, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": -6.078348159790039, | |
| "rewards/confidence": -0.25415006279945374, | |
| "rewards/confidence_mean_diff": 0.25415006279945374, | |
| "rewards/confidence_moving_diff": -0.0033754161559045315, | |
| "rewards/margins": 2.427124500274658, | |
| "rewards/mix_margin": 2.4271240234375, | |
| "rewards/real_percentage": 11.975000381469727, | |
| "rewards/rejected": -8.505472183227539, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.4329828146074096e-09, | |
| "logits/chosen": -0.4156433939933777, | |
| "logits/rejected": -0.3991900086402893, | |
| "logps/chosen": -312.8721618652344, | |
| "logps/rejected": -370.5389709472656, | |
| "loss": 0.3594, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": -5.89020299911499, | |
| "rewards/confidence": -0.2801781892776489, | |
| "rewards/confidence_mean_diff": 0.2801781892776489, | |
| "rewards/confidence_moving_diff": 0.0025722025893628597, | |
| "rewards/margins": 2.428835868835449, | |
| "rewards/mix_margin": 2.4288361072540283, | |
| "rewards/real_percentage": 12.074999809265137, | |
| "rewards/rejected": -8.319039344787598, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.5580202098509076e-09, | |
| "logits/chosen": -0.4434266984462738, | |
| "logits/rejected": -0.45592737197875977, | |
| "logps/chosen": -290.8067626953125, | |
| "logps/rejected": -341.25067138671875, | |
| "loss": 0.3425, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -5.5403594970703125, | |
| "rewards/confidence": -0.5445820093154907, | |
| "rewards/confidence_mean_diff": 0.5445820093154907, | |
| "rewards/confidence_moving_diff": 0.0008656553691253066, | |
| "rewards/margins": 2.351865291595459, | |
| "rewards/mix_margin": 2.351865291595459, | |
| "rewards/real_percentage": 12.024999618530273, | |
| "rewards/rejected": -7.8922247886657715, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 8.767851876239074e-10, | |
| "logits/chosen": -0.5394158959388733, | |
| "logits/rejected": -0.5107717514038086, | |
| "logps/chosen": -320.61065673828125, | |
| "logps/rejected": -353.67718505859375, | |
| "loss": 0.3947, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": -5.631471157073975, | |
| "rewards/confidence": -0.4513428211212158, | |
| "rewards/confidence_mean_diff": 0.4513428211212158, | |
| "rewards/confidence_moving_diff": -0.0011861994862556458, | |
| "rewards/margins": 2.674133777618408, | |
| "rewards/mix_margin": 2.674133777618408, | |
| "rewards/real_percentage": 12.0, | |
| "rewards/rejected": -8.305604934692383, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.898089545047445e-10, | |
| "logits/chosen": -0.5971912145614624, | |
| "logits/rejected": -0.5452942848205566, | |
| "logps/chosen": -315.72601318359375, | |
| "logps/rejected": -360.5458679199219, | |
| "loss": 0.3016, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -6.048798561096191, | |
| "rewards/confidence": -0.07524915784597397, | |
| "rewards/confidence_mean_diff": 0.07524915784597397, | |
| "rewards/confidence_moving_diff": -0.00730225071310997, | |
| "rewards/margins": 2.876786708831787, | |
| "rewards/mix_margin": 2.876786708831787, | |
| "rewards/real_percentage": 11.925000190734863, | |
| "rewards/rejected": -8.92558479309082, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 9.747123991141193e-11, | |
| "logits/chosen": -0.5507758855819702, | |
| "logits/rejected": -0.49374765157699585, | |
| "logps/chosen": -310.2986145019531, | |
| "logps/rejected": -357.17633056640625, | |
| "loss": 0.3239, | |
| "rewards/accuracies": 0.887499988079071, | |
| "rewards/chosen": -5.499569892883301, | |
| "rewards/confidence": 0.10529796779155731, | |
| "rewards/confidence_mean_diff": -0.10529796779155731, | |
| "rewards/confidence_moving_diff": -0.0012182623613625765, | |
| "rewards/margins": 2.964461326599121, | |
| "rewards/mix_margin": 2.964461088180542, | |
| "rewards/real_percentage": 11.949999809265137, | |
| "rewards/rejected": -8.464030265808105, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.0, | |
| "logits/chosen": -0.37802356481552124, | |
| "logits/rejected": -0.3598063886165619, | |
| "logps/chosen": -301.7012634277344, | |
| "logps/rejected": -346.14788818359375, | |
| "loss": 0.3353, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -5.516350746154785, | |
| "rewards/confidence": -0.47447115182876587, | |
| "rewards/confidence_mean_diff": 0.47447115182876587, | |
| "rewards/confidence_moving_diff": 0.005603847559541464, | |
| "rewards/margins": 2.416151523590088, | |
| "rewards/mix_margin": 2.416151762008667, | |
| "rewards/real_percentage": 12.074999809265137, | |
| "rewards/rejected": -7.932501792907715, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 1250, | |
| "total_flos": 0.0, | |
| "train_loss": 0.4112237459659576, | |
| "train_runtime": 8536.0121, | |
| "train_samples_per_second": 2.343, | |
| "train_steps_per_second": 0.146 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1250, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 200, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |