| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 19.578059071729957, | |
| "eval_steps": 500, | |
| "global_step": 580, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.33755274261603374, | |
| "grad_norm": 4.021343231201172, | |
| "learning_rate": 8.620689655172415e-07, | |
| "logits/chosen": -2.3571667671203613, | |
| "logits/rejected": -2.3413684368133545, | |
| "logps/chosen": -66.12004852294922, | |
| "logps/rejected": -75.67423248291016, | |
| "loss": 0.6916, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": 0.002384235616773367, | |
| "rewards/margins": 0.0006812589708715677, | |
| "rewards/rejected": 0.00170297606382519, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.6751054852320675, | |
| "grad_norm": 4.169963359832764, | |
| "learning_rate": 1.724137931034483e-06, | |
| "logits/chosen": -2.2970731258392334, | |
| "logits/rejected": -2.3126332759857178, | |
| "logps/chosen": -75.44676208496094, | |
| "logps/rejected": -91.96070861816406, | |
| "loss": 0.695, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.004610966891050339, | |
| "rewards/margins": -0.0035776779986917973, | |
| "rewards/rejected": -0.0010332881938666105, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 1.0126582278481013, | |
| "grad_norm": 3.956296682357788, | |
| "learning_rate": 2.5862068965517246e-06, | |
| "logits/chosen": -2.323883056640625, | |
| "logits/rejected": -2.360252857208252, | |
| "logps/chosen": -82.34379577636719, | |
| "logps/rejected": -76.72650146484375, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.0038711726665496826, | |
| "rewards/margins": -0.002081885002553463, | |
| "rewards/rejected": -0.001789287431165576, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.350210970464135, | |
| "grad_norm": 3.930093765258789, | |
| "learning_rate": 3.448275862068966e-06, | |
| "logits/chosen": -2.3154587745666504, | |
| "logits/rejected": -2.3474326133728027, | |
| "logps/chosen": -65.43012237548828, | |
| "logps/rejected": -73.15928649902344, | |
| "loss": 0.691, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": 0.005101156421005726, | |
| "rewards/margins": 0.008008326403796673, | |
| "rewards/rejected": -0.0029071702156215906, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.6877637130801688, | |
| "grad_norm": 4.285252094268799, | |
| "learning_rate": 4.310344827586207e-06, | |
| "logits/chosen": -2.3710691928863525, | |
| "logits/rejected": -2.371490478515625, | |
| "logps/chosen": -71.6757583618164, | |
| "logps/rejected": -70.73356628417969, | |
| "loss": 0.6889, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.0005036738002672791, | |
| "rewards/margins": 0.01000114344060421, | |
| "rewards/rejected": -0.009497471153736115, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 2.0253164556962027, | |
| "grad_norm": 4.272525787353516, | |
| "learning_rate": 4.999818897894192e-06, | |
| "logits/chosen": -2.3313004970550537, | |
| "logits/rejected": -2.3659963607788086, | |
| "logps/chosen": -76.61670684814453, | |
| "logps/rejected": -83.35383605957031, | |
| "loss": 0.6826, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.0012120052706450224, | |
| "rewards/margins": 0.0215681791305542, | |
| "rewards/rejected": -0.020356174558401108, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 2.3628691983122363, | |
| "grad_norm": 3.9699583053588867, | |
| "learning_rate": 4.9934830787948756e-06, | |
| "logits/chosen": -2.335836172103882, | |
| "logits/rejected": -2.346723794937134, | |
| "logps/chosen": -77.20027923583984, | |
| "logps/rejected": -76.7348403930664, | |
| "loss": 0.668, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 0.01467165071517229, | |
| "rewards/margins": 0.05109705403447151, | |
| "rewards/rejected": -0.036425404250621796, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 2.70042194092827, | |
| "grad_norm": 4.201504230499268, | |
| "learning_rate": 4.978118375700895e-06, | |
| "logits/chosen": -2.3483119010925293, | |
| "logits/rejected": -2.314948320388794, | |
| "logps/chosen": -86.64659881591797, | |
| "logps/rejected": -73.74983215332031, | |
| "loss": 0.6543, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.015483448281884193, | |
| "rewards/margins": 0.07252755016088486, | |
| "rewards/rejected": -0.057044100016355515, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 3.037974683544304, | |
| "grad_norm": 4.158086776733398, | |
| "learning_rate": 4.953780424089803e-06, | |
| "logits/chosen": -2.353440523147583, | |
| "logits/rejected": -2.3548269271850586, | |
| "logps/chosen": -66.6883773803711, | |
| "logps/rejected": -70.85858917236328, | |
| "loss": 0.6434, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": 0.027489716187119484, | |
| "rewards/margins": 0.11177561432123184, | |
| "rewards/rejected": -0.08428589999675751, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 3.3755274261603376, | |
| "grad_norm": 4.082221031188965, | |
| "learning_rate": 4.920557351506409e-06, | |
| "logits/chosen": -2.341866970062256, | |
| "logits/rejected": -2.364499092102051, | |
| "logps/chosen": -129.14761352539062, | |
| "logps/rejected": -73.15742492675781, | |
| "loss": 0.6058, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": 0.4784974157810211, | |
| "rewards/margins": 0.6413174867630005, | |
| "rewards/rejected": -0.16281995177268982, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 3.7130801687763713, | |
| "grad_norm": 4.832098007202148, | |
| "learning_rate": 4.878569458453592e-06, | |
| "logits/chosen": -2.345391035079956, | |
| "logits/rejected": -2.348513603210449, | |
| "logps/chosen": -77.12025451660156, | |
| "logps/rejected": -87.58562469482422, | |
| "loss": 0.583, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -0.005122403614223003, | |
| "rewards/margins": 0.2564489245414734, | |
| "rewards/rejected": -0.26157131791114807, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 4.050632911392405, | |
| "grad_norm": 4.12258243560791, | |
| "learning_rate": 4.827968782785062e-06, | |
| "logits/chosen": -2.373936176300049, | |
| "logits/rejected": -2.4137730598449707, | |
| "logps/chosen": -75.44515228271484, | |
| "logps/rejected": -93.63223266601562, | |
| "loss": 0.5739, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.009799259714782238, | |
| "rewards/margins": 0.2614334523677826, | |
| "rewards/rejected": -0.2712326943874359, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 4.3881856540084385, | |
| "grad_norm": 4.568253993988037, | |
| "learning_rate": 4.7689385491773934e-06, | |
| "logits/chosen": -2.3857738971710205, | |
| "logits/rejected": -2.3939366340637207, | |
| "logps/chosen": -75.08316802978516, | |
| "logps/rejected": -85.54856872558594, | |
| "loss": 0.5264, | |
| "rewards/accuracies": 0.887499988079071, | |
| "rewards/chosen": -0.057138361036777496, | |
| "rewards/margins": 0.4834250807762146, | |
| "rewards/rejected": -0.5405634641647339, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 4.725738396624473, | |
| "grad_norm": 5.059521675109863, | |
| "learning_rate": 4.70169250567482e-06, | |
| "logits/chosen": -2.403332471847534, | |
| "logits/rejected": -2.4016706943511963, | |
| "logps/chosen": -77.38340759277344, | |
| "logps/rejected": -77.5801010131836, | |
| "loss": 0.5097, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -0.15228016674518585, | |
| "rewards/margins": 0.39201101660728455, | |
| "rewards/rejected": -0.5442911982536316, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 5.063291139240507, | |
| "grad_norm": 4.922734260559082, | |
| "learning_rate": 4.626474149709127e-06, | |
| "logits/chosen": -2.400696039199829, | |
| "logits/rejected": -2.3924992084503174, | |
| "logps/chosen": -65.39146423339844, | |
| "logps/rejected": -72.46720886230469, | |
| "loss": 0.5004, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -0.16622374951839447, | |
| "rewards/margins": 0.5306353569030762, | |
| "rewards/rejected": -0.6968590617179871, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 5.40084388185654, | |
| "grad_norm": 4.739672660827637, | |
| "learning_rate": 4.54355584639723e-06, | |
| "logits/chosen": -2.426323413848877, | |
| "logits/rejected": -2.4180703163146973, | |
| "logps/chosen": -89.33406066894531, | |
| "logps/rejected": -86.6529541015625, | |
| "loss": 0.4472, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": -0.2703530192375183, | |
| "rewards/margins": 0.723961353302002, | |
| "rewards/rejected": -0.9943143725395203, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 5.738396624472574, | |
| "grad_norm": 5.088452339172363, | |
| "learning_rate": 4.45323784230908e-06, | |
| "logits/chosen": -2.371295690536499, | |
| "logits/rejected": -2.4007980823516846, | |
| "logps/chosen": -74.7086410522461, | |
| "logps/rejected": -110.96791076660156, | |
| "loss": 0.4337, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.19489821791648865, | |
| "rewards/margins": 0.838965117931366, | |
| "rewards/rejected": -1.0338633060455322, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 6.075949367088608, | |
| "grad_norm": 4.700001239776611, | |
| "learning_rate": 4.355847178277025e-06, | |
| "logits/chosen": -2.408874750137329, | |
| "logits/rejected": -2.433443546295166, | |
| "logps/chosen": -73.986083984375, | |
| "logps/rejected": -92.54002380371094, | |
| "loss": 0.4201, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -0.3620097041130066, | |
| "rewards/margins": 0.8090314865112305, | |
| "rewards/rejected": -1.1710412502288818, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 6.413502109704641, | |
| "grad_norm": 5.745143890380859, | |
| "learning_rate": 4.2517365051833564e-06, | |
| "logits/chosen": -2.413661479949951, | |
| "logits/rejected": -2.4319045543670654, | |
| "logps/chosen": -61.93586349487305, | |
| "logps/rejected": -87.93816375732422, | |
| "loss": 0.3822, | |
| "rewards/accuracies": 0.887499988079071, | |
| "rewards/chosen": -0.3493840992450714, | |
| "rewards/margins": 1.0286495685577393, | |
| "rewards/rejected": -1.3780337572097778, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 6.751054852320675, | |
| "grad_norm": 14.167346000671387, | |
| "learning_rate": 4.141282807014034e-06, | |
| "logits/chosen": -2.376776695251465, | |
| "logits/rejected": -2.386939287185669, | |
| "logps/chosen": -78.03498840332031, | |
| "logps/rejected": -103.80584716796875, | |
| "loss": 0.3533, | |
| "rewards/accuracies": 0.887499988079071, | |
| "rewards/chosen": -0.5522706508636475, | |
| "rewards/margins": 1.1880146265029907, | |
| "rewards/rejected": -1.7402851581573486, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 7.0886075949367084, | |
| "grad_norm": 5.238346576690674, | |
| "learning_rate": 4.024886035802432e-06, | |
| "logits/chosen": -2.399014711380005, | |
| "logits/rejected": -2.4000418186187744, | |
| "logps/chosen": -91.64801025390625, | |
| "logps/rejected": -88.77143096923828, | |
| "loss": 0.3512, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.6410180330276489, | |
| "rewards/margins": 1.2458090782165527, | |
| "rewards/rejected": -1.8868271112442017, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 7.4261603375527425, | |
| "grad_norm": 4.895415306091309, | |
| "learning_rate": 3.9029676634059565e-06, | |
| "logits/chosen": -2.38824725151062, | |
| "logits/rejected": -2.390937328338623, | |
| "logps/chosen": -76.24591064453125, | |
| "logps/rejected": -109.47352600097656, | |
| "loss": 0.3182, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.6207951307296753, | |
| "rewards/margins": 1.5345582962036133, | |
| "rewards/rejected": -2.155353546142578, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 7.763713080168777, | |
| "grad_norm": 7.413693904876709, | |
| "learning_rate": 3.7759691553595214e-06, | |
| "logits/chosen": -2.399376153945923, | |
| "logits/rejected": -2.3986117839813232, | |
| "logps/chosen": -90.97004699707031, | |
| "logps/rejected": -98.67308807373047, | |
| "loss": 0.2989, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -0.7282214760780334, | |
| "rewards/margins": 1.5992071628570557, | |
| "rewards/rejected": -2.3274283409118652, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 8.10126582278481, | |
| "grad_norm": 4.8047990798950195, | |
| "learning_rate": 3.6443503723320837e-06, | |
| "logits/chosen": -2.3746752738952637, | |
| "logits/rejected": -2.3904380798339844, | |
| "logps/chosen": -81.03557586669922, | |
| "logps/rejected": -98.62934112548828, | |
| "loss": 0.2891, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -1.0007418394088745, | |
| "rewards/margins": 1.6062211990356445, | |
| "rewards/rejected": -2.6069629192352295, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 8.438818565400844, | |
| "grad_norm": 5.075074672698975, | |
| "learning_rate": 3.508587904974522e-06, | |
| "logits/chosen": -2.3420817852020264, | |
| "logits/rejected": -2.3538601398468018, | |
| "logps/chosen": -86.0930404663086, | |
| "logps/rejected": -113.33358001708984, | |
| "loss": 0.2451, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -0.8170707821846008, | |
| "rewards/margins": 1.765887975692749, | |
| "rewards/rejected": -2.582958698272705, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 8.776371308016877, | |
| "grad_norm": 5.683614730834961, | |
| "learning_rate": 3.3691733481883693e-06, | |
| "logits/chosen": -2.3793487548828125, | |
| "logits/rejected": -2.376232862472534, | |
| "logps/chosen": -82.44889831542969, | |
| "logps/rejected": -100.43474578857422, | |
| "loss": 0.2561, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -1.052356481552124, | |
| "rewards/margins": 1.7894868850708008, | |
| "rewards/rejected": -2.841843605041504, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 9.113924050632912, | |
| "grad_norm": 5.043191909790039, | |
| "learning_rate": 3.226611521064278e-06, | |
| "logits/chosen": -2.3715972900390625, | |
| "logits/rejected": -2.3901712894439697, | |
| "logps/chosen": -78.08219909667969, | |
| "logps/rejected": -101.30404663085938, | |
| "loss": 0.2135, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": -1.124599575996399, | |
| "rewards/margins": 1.955810546875, | |
| "rewards/rejected": -3.0804100036621094, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 9.451476793248945, | |
| "grad_norm": 4.911313533782959, | |
| "learning_rate": 3.0814186389357765e-06, | |
| "logits/chosen": -2.365330696105957, | |
| "logits/rejected": -2.3815040588378906, | |
| "logps/chosen": -74.75053405761719, | |
| "logps/rejected": -96.59932708740234, | |
| "loss": 0.2016, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -1.2124760150909424, | |
| "rewards/margins": 2.1574602127075195, | |
| "rewards/rejected": -3.369936466217041, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 9.789029535864978, | |
| "grad_norm": 5.809771537780762, | |
| "learning_rate": 2.9341204441673267e-06, | |
| "logits/chosen": -2.349513292312622, | |
| "logits/rejected": -2.362490653991699, | |
| "logps/chosen": -82.00188446044922, | |
| "logps/rejected": -115.06053161621094, | |
| "loss": 0.1873, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -1.5614944696426392, | |
| "rewards/margins": 2.464742660522461, | |
| "rewards/rejected": -4.0262370109558105, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 10.126582278481013, | |
| "grad_norm": 5.956116676330566, | |
| "learning_rate": 2.785250302445062e-06, | |
| "logits/chosen": -2.3490607738494873, | |
| "logits/rejected": -2.3738341331481934, | |
| "logps/chosen": -96.7169418334961, | |
| "logps/rejected": -125.81837463378906, | |
| "loss": 0.1794, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": -1.4949434995651245, | |
| "rewards/margins": 2.48805832862854, | |
| "rewards/rejected": -3.983001708984375, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 10.464135021097047, | |
| "grad_norm": 5.315141201019287, | |
| "learning_rate": 2.6353472714635443e-06, | |
| "logits/chosen": -2.3448591232299805, | |
| "logits/rejected": -2.3330416679382324, | |
| "logps/chosen": -77.31942749023438, | |
| "logps/rejected": -108.85356140136719, | |
| "loss": 0.1444, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -1.4829410314559937, | |
| "rewards/margins": 2.6632163524627686, | |
| "rewards/rejected": -4.146157264709473, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 10.80168776371308, | |
| "grad_norm": 5.832997798919678, | |
| "learning_rate": 2.4849541490017868e-06, | |
| "logits/chosen": -2.3063769340515137, | |
| "logits/rejected": -2.31026029586792, | |
| "logps/chosen": -89.78933715820312, | |
| "logps/rejected": -124.50910949707031, | |
| "loss": 0.1507, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": -1.8659601211547852, | |
| "rewards/margins": 2.7945058345794678, | |
| "rewards/rejected": -4.660466194152832, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 11.139240506329115, | |
| "grad_norm": 5.006984233856201, | |
| "learning_rate": 2.3346155074564712e-06, | |
| "logits/chosen": -2.3069465160369873, | |
| "logits/rejected": -2.3103697299957275, | |
| "logps/chosen": -94.30183410644531, | |
| "logps/rejected": -111.3757553100586, | |
| "loss": 0.1389, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": -1.8566234111785889, | |
| "rewards/margins": 2.650669574737549, | |
| "rewards/rejected": -4.507293701171875, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 11.476793248945148, | |
| "grad_norm": 5.461178302764893, | |
| "learning_rate": 2.184875721949277e-06, | |
| "logits/chosen": -2.307982921600342, | |
| "logits/rejected": -2.2805938720703125, | |
| "logps/chosen": -95.00460815429688, | |
| "logps/rejected": -130.14364624023438, | |
| "loss": 0.1192, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.095489025115967, | |
| "rewards/margins": 3.2359142303466797, | |
| "rewards/rejected": -5.331402778625488, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 11.814345991561181, | |
| "grad_norm": 5.430806636810303, | |
| "learning_rate": 2.0362769991485514e-06, | |
| "logits/chosen": -2.305732488632202, | |
| "logits/rejected": -2.325230360031128, | |
| "logps/chosen": -85.6950454711914, | |
| "logps/rejected": -129.57199096679688, | |
| "loss": 0.1168, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -1.912523865699768, | |
| "rewards/margins": 3.0515921115875244, | |
| "rewards/rejected": -4.964116096496582, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 12.151898734177216, | |
| "grad_norm": 4.317180156707764, | |
| "learning_rate": 1.8893574139429226e-06, | |
| "logits/chosen": -2.29160737991333, | |
| "logits/rejected": -2.3020012378692627, | |
| "logps/chosen": -88.52080535888672, | |
| "logps/rejected": -132.19122314453125, | |
| "loss": 0.0974, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -2.399042844772339, | |
| "rewards/margins": 3.41259503364563, | |
| "rewards/rejected": -5.811637878417969, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 12.48945147679325, | |
| "grad_norm": 4.4959611892700195, | |
| "learning_rate": 1.744648961076068e-06, | |
| "logits/chosen": -2.2502753734588623, | |
| "logits/rejected": -2.26035737991333, | |
| "logps/chosen": -100.68859100341797, | |
| "logps/rejected": -137.6667938232422, | |
| "loss": 0.084, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -2.559737205505371, | |
| "rewards/margins": 3.8113670349121094, | |
| "rewards/rejected": -6.3711042404174805, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 12.827004219409282, | |
| "grad_norm": 5.346553802490234, | |
| "learning_rate": 1.602675628797636e-06, | |
| "logits/chosen": -2.2998809814453125, | |
| "logits/rejected": -2.2750256061553955, | |
| "logps/chosen": -100.53385925292969, | |
| "logps/rejected": -129.20062255859375, | |
| "loss": 0.0921, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -2.7617571353912354, | |
| "rewards/margins": 3.2644877433776855, | |
| "rewards/rejected": -6.0262451171875, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 13.164556962025316, | |
| "grad_norm": 4.991717338562012, | |
| "learning_rate": 1.4639515015056205e-06, | |
| "logits/chosen": -2.249573230743408, | |
| "logits/rejected": -2.265824556350708, | |
| "logps/chosen": -104.0100326538086, | |
| "logps/rejected": -141.9222869873047, | |
| "loss": 0.09, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.6467862129211426, | |
| "rewards/margins": 3.3599770069122314, | |
| "rewards/rejected": -6.006763458251953, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 13.50210970464135, | |
| "grad_norm": 5.525568962097168, | |
| "learning_rate": 1.328978898250525e-06, | |
| "logits/chosen": -2.304884672164917, | |
| "logits/rejected": -2.2955222129821777, | |
| "logps/chosen": -99.25491333007812, | |
| "logps/rejected": -142.86819458007812, | |
| "loss": 0.0758, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.675879955291748, | |
| "rewards/margins": 3.701845645904541, | |
| "rewards/rejected": -6.377726078033447, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 13.839662447257384, | |
| "grad_norm": 4.815173149108887, | |
| "learning_rate": 1.198246553841744e-06, | |
| "logits/chosen": -2.295896053314209, | |
| "logits/rejected": -2.3182671070098877, | |
| "logps/chosen": -90.5086669921875, | |
| "logps/rejected": -123.36918640136719, | |
| "loss": 0.0631, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.5161099433898926, | |
| "rewards/margins": 3.6973776817321777, | |
| "rewards/rejected": -6.21348762512207, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 14.177215189873417, | |
| "grad_norm": 3.638730525970459, | |
| "learning_rate": 1.0722278491423998e-06, | |
| "logits/chosen": -2.2437102794647217, | |
| "logits/rejected": -2.259737014770508, | |
| "logps/chosen": -100.89008331298828, | |
| "logps/rejected": -145.32876586914062, | |
| "loss": 0.0618, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -3.0369720458984375, | |
| "rewards/margins": 4.202728271484375, | |
| "rewards/rejected": -7.2397003173828125, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 14.514767932489452, | |
| "grad_norm": 3.4701929092407227, | |
| "learning_rate": 9.513790969606926e-07, | |
| "logits/chosen": -2.235750913619995, | |
| "logits/rejected": -2.2566628456115723, | |
| "logps/chosen": -113.2632827758789, | |
| "logps/rejected": -151.08389282226562, | |
| "loss": 0.0575, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.1561920642852783, | |
| "rewards/margins": 3.824761152267456, | |
| "rewards/rejected": -6.980954170227051, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 14.852320675105485, | |
| "grad_norm": 3.5095152854919434, | |
| "learning_rate": 8.361378897445643e-07, | |
| "logits/chosen": -2.22627592086792, | |
| "logits/rejected": -2.257497787475586, | |
| "logps/chosen": -114.46488952636719, | |
| "logps/rejected": -151.49237060546875, | |
| "loss": 0.0614, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.645853042602539, | |
| "rewards/margins": 4.079274654388428, | |
| "rewards/rejected": -7.725128173828125, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 15.189873417721518, | |
| "grad_norm": 4.185005187988281, | |
| "learning_rate": 7.269215150626391e-07, | |
| "logits/chosen": -2.2187466621398926, | |
| "logits/rejected": -2.2320122718811035, | |
| "logps/chosen": -113.44010925292969, | |
| "logps/rejected": -154.74827575683594, | |
| "loss": 0.0552, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.342592239379883, | |
| "rewards/margins": 4.421627998352051, | |
| "rewards/rejected": -7.764220237731934, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 15.527426160337553, | |
| "grad_norm": 3.698930263519287, | |
| "learning_rate": 6.241254446089942e-07, | |
| "logits/chosen": -2.194887638092041, | |
| "logits/rejected": -2.2225141525268555, | |
| "logps/chosen": -106.3698501586914, | |
| "logps/rejected": -159.8157501220703, | |
| "loss": 0.0494, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.6747384071350098, | |
| "rewards/margins": 4.638253211975098, | |
| "rewards/rejected": -8.31299114227295, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 15.864978902953586, | |
| "grad_norm": 4.312787055969238, | |
| "learning_rate": 5.281219022030423e-07, | |
| "logits/chosen": -2.215421438217163, | |
| "logits/rejected": -2.2375741004943848, | |
| "logps/chosen": -98.69608306884766, | |
| "logps/rejected": -165.375, | |
| "loss": 0.0477, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.6675498485565186, | |
| "rewards/margins": 4.56928825378418, | |
| "rewards/rejected": -8.236838340759277, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 16.20253164556962, | |
| "grad_norm": 3.4455654621124268, | |
| "learning_rate": 4.392585159698087e-07, | |
| "logits/chosen": -2.20900297164917, | |
| "logits/rejected": -2.214944362640381, | |
| "logps/chosen": -92.8326416015625, | |
| "logps/rejected": -133.35049438476562, | |
| "loss": 0.0518, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.2708263397216797, | |
| "rewards/margins": 4.095736980438232, | |
| "rewards/rejected": -7.366563320159912, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 16.540084388185655, | |
| "grad_norm": 4.105606555938721, | |
| "learning_rate": 3.578570595810274e-07, | |
| "logits/chosen": -2.1989166736602783, | |
| "logits/rejected": -2.2026238441467285, | |
| "logps/chosen": -124.64225006103516, | |
| "logps/rejected": -160.11962890625, | |
| "loss": 0.0486, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.9146201610565186, | |
| "rewards/margins": 4.5453619956970215, | |
| "rewards/rejected": -8.459983825683594, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 16.877637130801688, | |
| "grad_norm": 3.0307629108428955, | |
| "learning_rate": 2.8421228711503127e-07, | |
| "logits/chosen": -2.2583324909210205, | |
| "logits/rejected": -2.264362335205078, | |
| "logps/chosen": -109.14697265625, | |
| "logps/rejected": -153.9482421875, | |
| "loss": 0.0416, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -4.014981269836426, | |
| "rewards/margins": 4.060934066772461, | |
| "rewards/rejected": -8.075915336608887, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 17.21518987341772, | |
| "grad_norm": 7.35497522354126, | |
| "learning_rate": 2.1859086575439225e-07, | |
| "logits/chosen": -2.2129454612731934, | |
| "logits/rejected": -2.2381744384765625, | |
| "logps/chosen": -105.6734848022461, | |
| "logps/rejected": -155.99111938476562, | |
| "loss": 0.0448, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.604158878326416, | |
| "rewards/margins": 4.801852703094482, | |
| "rewards/rejected": -8.406011581420898, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 17.552742616033754, | |
| "grad_norm": 3.5699377059936523, | |
| "learning_rate": 1.6123041018599766e-07, | |
| "logits/chosen": -2.195000410079956, | |
| "logits/rejected": -2.2445883750915527, | |
| "logps/chosen": -116.91619873046875, | |
| "logps/rejected": -181.08287048339844, | |
| "loss": 0.0348, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -4.4316792488098145, | |
| "rewards/margins": 4.899362087249756, | |
| "rewards/rejected": -9.33104133605957, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 17.890295358649787, | |
| "grad_norm": 4.2693352699279785, | |
| "learning_rate": 1.1233862220001168e-07, | |
| "logits/chosen": -2.172609806060791, | |
| "logits/rejected": -2.196518898010254, | |
| "logps/chosen": -117.672119140625, | |
| "logps/rejected": -155.81735229492188, | |
| "loss": 0.0494, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -3.591332197189331, | |
| "rewards/margins": 4.197774410247803, | |
| "rewards/rejected": -7.789106845855713, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 18.227848101265824, | |
| "grad_norm": 3.4541165828704834, | |
| "learning_rate": 7.209253860320897e-08, | |
| "logits/chosen": -2.206496000289917, | |
| "logits/rejected": -2.2164080142974854, | |
| "logps/chosen": -114.9886474609375, | |
| "logps/rejected": -154.38595581054688, | |
| "loss": 0.0396, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -4.102456092834473, | |
| "rewards/margins": 4.425411701202393, | |
| "rewards/rejected": -8.527867317199707, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 18.565400843881857, | |
| "grad_norm": 3.7231645584106445, | |
| "learning_rate": 4.063789016999331e-08, | |
| "logits/chosen": -2.2224667072296143, | |
| "logits/rejected": -2.248166561126709, | |
| "logps/chosen": -100.99537658691406, | |
| "logps/rejected": -148.0446319580078, | |
| "loss": 0.0473, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.5166335105895996, | |
| "rewards/margins": 4.642723560333252, | |
| "rewards/rejected": -8.159357070922852, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 18.90295358649789, | |
| "grad_norm": 3.382079601287842, | |
| "learning_rate": 1.808857395232788e-08, | |
| "logits/chosen": -2.1830036640167236, | |
| "logits/rejected": -2.205293893814087, | |
| "logps/chosen": -110.93768310546875, | |
| "logps/rejected": -149.43922424316406, | |
| "loss": 0.0378, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.975515842437744, | |
| "rewards/margins": 4.391709327697754, | |
| "rewards/rejected": -8.36722469329834, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 19.240506329113924, | |
| "grad_norm": 3.477522850036621, | |
| "learning_rate": 4.526240859345499e-09, | |
| "logits/chosen": -2.1929779052734375, | |
| "logits/rejected": -2.1888599395751953, | |
| "logps/chosen": -112.3604507446289, | |
| "logps/rejected": -159.12904357910156, | |
| "loss": 0.0428, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -4.224286079406738, | |
| "rewards/margins": 4.6413350105285645, | |
| "rewards/rejected": -8.865621566772461, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 19.578059071729957, | |
| "grad_norm": 2.8677260875701904, | |
| "learning_rate": 0.0, | |
| "logits/chosen": -2.2252745628356934, | |
| "logits/rejected": -2.255455732345581, | |
| "logps/chosen": -98.1314468383789, | |
| "logps/rejected": -156.11318969726562, | |
| "loss": 0.041, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.2994728088378906, | |
| "rewards/margins": 4.715044975280762, | |
| "rewards/rejected": -8.014516830444336, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 19.578059071729957, | |
| "step": 580, | |
| "total_flos": 2.1485149125974426e+18, | |
| "train_loss": 0.2753299895545532, | |
| "train_runtime": 4556.676, | |
| "train_samples_per_second": 8.322, | |
| "train_steps_per_second": 0.127 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 580, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.1485149125974426e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |