| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 100, | |
| "global_step": 2495, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0004008016032064128, | |
| "grad_norm": 3947236.9376629265, | |
| "learning_rate": 2e-09, | |
| "logits/chosen": -2.5464653968811035, | |
| "logits/rejected": -2.4981484413146973, | |
| "logps/chosen": -136.25015258789062, | |
| "logps/rejected": -109.48806762695312, | |
| "loss": 125090.2344, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.004008016032064128, | |
| "grad_norm": 4131433.2054548617, | |
| "learning_rate": 2e-08, | |
| "logits/chosen": -2.444033145904541, | |
| "logits/rejected": -2.4536919593811035, | |
| "logps/chosen": -96.72305297851562, | |
| "logps/rejected": -102.78682708740234, | |
| "loss": 128262.9167, | |
| "rewards/accuracies": 0.4861111044883728, | |
| "rewards/chosen": 1.4120871128397994e-05, | |
| "rewards/margins": 5.588051863014698e-06, | |
| "rewards/rejected": 8.532813808415085e-06, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.008016032064128256, | |
| "grad_norm": 4082913.592970218, | |
| "learning_rate": 4e-08, | |
| "logits/chosen": -2.470759153366089, | |
| "logits/rejected": -2.4879543781280518, | |
| "logps/chosen": -82.20399475097656, | |
| "logps/rejected": -95.1635513305664, | |
| "loss": 128748.0, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.00010425537766423076, | |
| "rewards/margins": -2.428081279504113e-05, | |
| "rewards/rejected": -7.997456850716844e-05, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.012024048096192385, | |
| "grad_norm": 3699942.5510203396, | |
| "learning_rate": 6e-08, | |
| "logits/chosen": -2.3510866165161133, | |
| "logits/rejected": -2.3375275135040283, | |
| "logps/chosen": -90.31131744384766, | |
| "logps/rejected": -91.30790710449219, | |
| "loss": 128316.2875, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -8.024895214475691e-05, | |
| "rewards/margins": 9.57045704126358e-05, | |
| "rewards/rejected": -0.00017595352255739272, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01603206412825651, | |
| "grad_norm": 3526899.1699538147, | |
| "learning_rate": 8e-08, | |
| "logits/chosen": -2.4227395057678223, | |
| "logits/rejected": -2.427928924560547, | |
| "logps/chosen": -74.48422241210938, | |
| "logps/rejected": -77.61112213134766, | |
| "loss": 126226.3125, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.00019940172205679119, | |
| "rewards/margins": -5.970364873064682e-05, | |
| "rewards/rejected": -0.00013969806605018675, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.02004008016032064, | |
| "grad_norm": 4491084.202014744, | |
| "learning_rate": 1e-07, | |
| "logits/chosen": -2.4444451332092285, | |
| "logits/rejected": -2.4303643703460693, | |
| "logps/chosen": -82.40409088134766, | |
| "logps/rejected": -85.80543518066406, | |
| "loss": 129160.9375, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.00018717416969593614, | |
| "rewards/margins": 0.00014039597590453923, | |
| "rewards/rejected": -0.0003275701601523906, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.02404809619238477, | |
| "grad_norm": 3403622.9497329933, | |
| "learning_rate": 1.2e-07, | |
| "logits/chosen": -2.4172229766845703, | |
| "logits/rejected": -2.395040988922119, | |
| "logps/chosen": -93.23040008544922, | |
| "logps/rejected": -97.07014465332031, | |
| "loss": 125728.05, | |
| "rewards/accuracies": 0.4124999940395355, | |
| "rewards/chosen": -0.0002615585399325937, | |
| "rewards/margins": -0.0001315469853579998, | |
| "rewards/rejected": -0.00013001154002267867, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.028056112224448898, | |
| "grad_norm": 4044511.014833911, | |
| "learning_rate": 1.4e-07, | |
| "logits/chosen": -2.3989176750183105, | |
| "logits/rejected": -2.3830370903015137, | |
| "logps/chosen": -101.8390121459961, | |
| "logps/rejected": -108.08101654052734, | |
| "loss": 130639.9625, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.00044045006507076323, | |
| "rewards/margins": -9.915141708916053e-05, | |
| "rewards/rejected": -0.00034129866980947554, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.03206412825651302, | |
| "grad_norm": 5583101.963034321, | |
| "learning_rate": 1.6e-07, | |
| "logits/chosen": -2.4113218784332275, | |
| "logits/rejected": -2.394385814666748, | |
| "logps/chosen": -93.59923553466797, | |
| "logps/rejected": -96.75505828857422, | |
| "loss": 133003.7125, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.00045990836224518716, | |
| "rewards/margins": 0.0003325659781694412, | |
| "rewards/rejected": -0.0007924743695184588, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.036072144288577156, | |
| "grad_norm": 6062402.798193364, | |
| "learning_rate": 1.8e-07, | |
| "logits/chosen": -2.2995922565460205, | |
| "logits/rejected": -2.2753472328186035, | |
| "logps/chosen": -83.42291259765625, | |
| "logps/rejected": -92.72061920166016, | |
| "loss": 130032.6125, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.001521119149401784, | |
| "rewards/margins": 0.0012046361807733774, | |
| "rewards/rejected": -0.0027257553301751614, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.04008016032064128, | |
| "grad_norm": 6312836.0228954, | |
| "learning_rate": 2e-07, | |
| "logits/chosen": -2.2923099994659424, | |
| "logits/rejected": -2.303053379058838, | |
| "logps/chosen": -107.588134765625, | |
| "logps/rejected": -121.3271484375, | |
| "loss": 129288.9125, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.009896589443087578, | |
| "rewards/margins": 0.0033134943805634975, | |
| "rewards/rejected": -0.013210085220634937, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04408817635270541, | |
| "grad_norm": 5590976.547776195, | |
| "learning_rate": 2.1999999999999998e-07, | |
| "logits/chosen": -2.3039541244506836, | |
| "logits/rejected": -2.3309550285339355, | |
| "logps/chosen": -112.95283508300781, | |
| "logps/rejected": -126.42842102050781, | |
| "loss": 128575.45, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.012949028983712196, | |
| "rewards/margins": 0.0049244253896176815, | |
| "rewards/rejected": -0.017873454838991165, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.04809619238476954, | |
| "grad_norm": 9019678.387801899, | |
| "learning_rate": 2.4e-07, | |
| "logits/chosen": -2.1888508796691895, | |
| "logits/rejected": -2.189389705657959, | |
| "logps/chosen": -112.77528381347656, | |
| "logps/rejected": -131.09449768066406, | |
| "loss": 124154.3125, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.02229696698486805, | |
| "rewards/margins": 0.01256654690951109, | |
| "rewards/rejected": -0.034863512963056564, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.052104208416833664, | |
| "grad_norm": 7017232.640334902, | |
| "learning_rate": 2.6e-07, | |
| "logits/chosen": -2.3326258659362793, | |
| "logits/rejected": -2.3331451416015625, | |
| "logps/chosen": -103.95518493652344, | |
| "logps/rejected": -114.6365966796875, | |
| "loss": 127130.725, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.01884140633046627, | |
| "rewards/margins": 0.0011669672094285488, | |
| "rewards/rejected": -0.020008374005556107, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.056112224448897796, | |
| "grad_norm": 8047688.165085967, | |
| "learning_rate": 2.8e-07, | |
| "logits/chosen": -2.2432637214660645, | |
| "logits/rejected": -2.2273428440093994, | |
| "logps/chosen": -100.75127410888672, | |
| "logps/rejected": -108.84329986572266, | |
| "loss": 125544.7375, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.019932765513658524, | |
| "rewards/margins": 0.006595449987798929, | |
| "rewards/rejected": -0.026528215035796165, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.06012024048096192, | |
| "grad_norm": 13389878.80906382, | |
| "learning_rate": 3e-07, | |
| "logits/chosen": -2.338097095489502, | |
| "logits/rejected": -2.334582805633545, | |
| "logps/chosen": -98.87701416015625, | |
| "logps/rejected": -128.06649780273438, | |
| "loss": 126277.5125, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.01577303186058998, | |
| "rewards/margins": 0.00698325177654624, | |
| "rewards/rejected": -0.022756287828087807, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.06412825651302605, | |
| "grad_norm": 6547486.513058976, | |
| "learning_rate": 3.2e-07, | |
| "logits/chosen": -2.2872040271759033, | |
| "logits/rejected": -2.318220615386963, | |
| "logps/chosen": -107.16642761230469, | |
| "logps/rejected": -128.63902282714844, | |
| "loss": 130629.6125, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.009940323419868946, | |
| "rewards/margins": 0.004813443869352341, | |
| "rewards/rejected": -0.014753768220543861, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.06813627254509018, | |
| "grad_norm": 6238269.898134831, | |
| "learning_rate": 3.4000000000000003e-07, | |
| "logits/chosen": -2.283688545227051, | |
| "logits/rejected": -2.269543170928955, | |
| "logps/chosen": -112.38804626464844, | |
| "logps/rejected": -124.51107025146484, | |
| "loss": 129337.075, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.018186267465353012, | |
| "rewards/margins": 0.010698455385863781, | |
| "rewards/rejected": -0.028884723782539368, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.07214428857715431, | |
| "grad_norm": 5333508.312286028, | |
| "learning_rate": 3.6e-07, | |
| "logits/chosen": -2.440035104751587, | |
| "logits/rejected": -2.416351318359375, | |
| "logps/chosen": -115.54080963134766, | |
| "logps/rejected": -128.35433959960938, | |
| "loss": 125903.175, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.01606156677007675, | |
| "rewards/margins": 0.0027716129552572966, | |
| "rewards/rejected": -0.018833179026842117, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.07615230460921844, | |
| "grad_norm": 6363941.004816477, | |
| "learning_rate": 3.7999999999999996e-07, | |
| "logits/chosen": -2.302215337753296, | |
| "logits/rejected": -2.316080093383789, | |
| "logps/chosen": -95.27733612060547, | |
| "logps/rejected": -109.7214126586914, | |
| "loss": 128074.3375, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.013280262239277363, | |
| "rewards/margins": 0.004476197995245457, | |
| "rewards/rejected": -0.01775646023452282, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.08016032064128256, | |
| "grad_norm": 6604467.189655725, | |
| "learning_rate": 4e-07, | |
| "logits/chosen": -2.4023125171661377, | |
| "logits/rejected": -2.403869152069092, | |
| "logps/chosen": -120.79121398925781, | |
| "logps/rejected": -123.46217346191406, | |
| "loss": 130248.2, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.009212212637066841, | |
| "rewards/margins": 0.0011829538270831108, | |
| "rewards/rejected": -0.010395165532827377, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0841683366733467, | |
| "grad_norm": 6023064.099431328, | |
| "learning_rate": 4.1999999999999995e-07, | |
| "logits/chosen": -2.456587553024292, | |
| "logits/rejected": -2.45320725440979, | |
| "logps/chosen": -97.8330307006836, | |
| "logps/rejected": -110.1967544555664, | |
| "loss": 132337.3375, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.010994483716785908, | |
| "rewards/margins": 0.003299609525129199, | |
| "rewards/rejected": -0.01429409347474575, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.08817635270541083, | |
| "grad_norm": 6070060.076923608, | |
| "learning_rate": 4.3999999999999997e-07, | |
| "logits/chosen": -2.464625835418701, | |
| "logits/rejected": -2.4630608558654785, | |
| "logps/chosen": -101.9610366821289, | |
| "logps/rejected": -116.52901458740234, | |
| "loss": 122057.3875, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.015559064224362373, | |
| "rewards/margins": 0.006916286889463663, | |
| "rewards/rejected": -0.02247535064816475, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.09218436873747494, | |
| "grad_norm": 6531050.132289726, | |
| "learning_rate": 4.6e-07, | |
| "logits/chosen": -2.5064964294433594, | |
| "logits/rejected": -2.4663119316101074, | |
| "logps/chosen": -113.6077880859375, | |
| "logps/rejected": -136.72740173339844, | |
| "loss": 126837.725, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.02166624739766121, | |
| "rewards/margins": 0.015731699764728546, | |
| "rewards/rejected": -0.03739794343709946, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.09619238476953908, | |
| "grad_norm": 8249199.68586373, | |
| "learning_rate": 4.8e-07, | |
| "logits/chosen": -2.4191393852233887, | |
| "logits/rejected": -2.4175992012023926, | |
| "logps/chosen": -140.10438537597656, | |
| "logps/rejected": -161.83901977539062, | |
| "loss": 127447.7375, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.024259308353066444, | |
| "rewards/margins": 0.015592202544212341, | |
| "rewards/rejected": -0.039851509034633636, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.10020040080160321, | |
| "grad_norm": 6273689.257003604, | |
| "learning_rate": 5e-07, | |
| "logits/chosen": -2.4710445404052734, | |
| "logits/rejected": -2.49545955657959, | |
| "logps/chosen": -125.069091796875, | |
| "logps/rejected": -141.25308227539062, | |
| "loss": 127510.825, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.02523133158683777, | |
| "rewards/margins": 0.014551195316016674, | |
| "rewards/rejected": -0.03978252038359642, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.10420841683366733, | |
| "grad_norm": 4863381.252711604, | |
| "learning_rate": 4.97772828507795e-07, | |
| "logits/chosen": -2.508707046508789, | |
| "logits/rejected": -2.5528035163879395, | |
| "logps/chosen": -109.48052978515625, | |
| "logps/rejected": -121.25135803222656, | |
| "loss": 127932.3875, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.008669688366353512, | |
| "rewards/margins": 0.0026229789946228266, | |
| "rewards/rejected": -0.01129266805946827, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.10821643286573146, | |
| "grad_norm": 7439978.719265488, | |
| "learning_rate": 4.955456570155902e-07, | |
| "logits/chosen": -2.661339282989502, | |
| "logits/rejected": -2.612370729446411, | |
| "logps/chosen": -104.54673767089844, | |
| "logps/rejected": -126.21573638916016, | |
| "loss": 129504.125, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.013199085369706154, | |
| "rewards/margins": 0.013015885837376118, | |
| "rewards/rejected": -0.026214972138404846, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.11222444889779559, | |
| "grad_norm": 5652432.617099802, | |
| "learning_rate": 4.933184855233853e-07, | |
| "logits/chosen": -2.657796621322632, | |
| "logits/rejected": -2.6554348468780518, | |
| "logps/chosen": -120.7027587890625, | |
| "logps/rejected": -123.74530029296875, | |
| "loss": 133293.2, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.019954059273004532, | |
| "rewards/margins": 0.002449373248964548, | |
| "rewards/rejected": -0.022403430193662643, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.11623246492985972, | |
| "grad_norm": 8254991.879081396, | |
| "learning_rate": 4.910913140311803e-07, | |
| "logits/chosen": -2.7399675846099854, | |
| "logits/rejected": -2.7726333141326904, | |
| "logps/chosen": -100.21595001220703, | |
| "logps/rejected": -131.60617065429688, | |
| "loss": 123595.3, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.015642058104276657, | |
| "rewards/margins": 0.015260448679327965, | |
| "rewards/rejected": -0.030902501195669174, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.12024048096192384, | |
| "grad_norm": 8927344.576164661, | |
| "learning_rate": 4.888641425389755e-07, | |
| "logits/chosen": -2.6996548175811768, | |
| "logits/rejected": -2.74585223197937, | |
| "logps/chosen": -113.76595306396484, | |
| "logps/rejected": -144.14627075195312, | |
| "loss": 126853.8875, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.02661210298538208, | |
| "rewards/margins": 0.016480224207043648, | |
| "rewards/rejected": -0.04309232532978058, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.12424849699398798, | |
| "grad_norm": 6996156.236412326, | |
| "learning_rate": 4.866369710467706e-07, | |
| "logits/chosen": -2.6147875785827637, | |
| "logits/rejected": -2.6017518043518066, | |
| "logps/chosen": -103.03385925292969, | |
| "logps/rejected": -116.8309097290039, | |
| "loss": 128838.425, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.0185169018805027, | |
| "rewards/margins": 0.004754557274281979, | |
| "rewards/rejected": -0.023271460086107254, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.1282565130260521, | |
| "grad_norm": 7524659.80788171, | |
| "learning_rate": 4.844097995545656e-07, | |
| "logits/chosen": -2.696021556854248, | |
| "logits/rejected": -2.6883420944213867, | |
| "logps/chosen": -122.92547607421875, | |
| "logps/rejected": -142.40811157226562, | |
| "loss": 122181.3125, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.015210810117423534, | |
| "rewards/margins": 0.01843477226793766, | |
| "rewards/rejected": -0.03364557772874832, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.13226452905811623, | |
| "grad_norm": 6072329.149131962, | |
| "learning_rate": 4.821826280623608e-07, | |
| "logits/chosen": -2.4829201698303223, | |
| "logits/rejected": -2.50555419921875, | |
| "logps/chosen": -117.76029968261719, | |
| "logps/rejected": -145.81637573242188, | |
| "loss": 124020.45, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.020629018545150757, | |
| "rewards/margins": 0.017869364470243454, | |
| "rewards/rejected": -0.03849838301539421, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.13627254509018036, | |
| "grad_norm": 8789561.131088747, | |
| "learning_rate": 4.799554565701559e-07, | |
| "logits/chosen": -2.572274684906006, | |
| "logits/rejected": -2.599792957305908, | |
| "logps/chosen": -102.4592514038086, | |
| "logps/rejected": -136.51661682128906, | |
| "loss": 117515.9625, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.024631744250655174, | |
| "rewards/margins": 0.019154489040374756, | |
| "rewards/rejected": -0.04378623515367508, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.1402805611222445, | |
| "grad_norm": 4632119.649847494, | |
| "learning_rate": 4.77728285077951e-07, | |
| "logits/chosen": -2.4988906383514404, | |
| "logits/rejected": -2.4986491203308105, | |
| "logps/chosen": -109.21330261230469, | |
| "logps/rejected": -121.016357421875, | |
| "loss": 126712.85, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.0240558423101902, | |
| "rewards/margins": 0.011534234508872032, | |
| "rewards/rejected": -0.035590074956417084, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.14428857715430862, | |
| "grad_norm": 6705809.181653678, | |
| "learning_rate": 4.7550111358574605e-07, | |
| "logits/chosen": -2.439885377883911, | |
| "logits/rejected": -2.488706111907959, | |
| "logps/chosen": -121.1862564086914, | |
| "logps/rejected": -154.49058532714844, | |
| "loss": 130547.825, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.02486741915345192, | |
| "rewards/margins": 0.023590799421072006, | |
| "rewards/rejected": -0.04845822602510452, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.14829659318637275, | |
| "grad_norm": 9372987.582902173, | |
| "learning_rate": 4.7327394209354114e-07, | |
| "logits/chosen": -2.326481819152832, | |
| "logits/rejected": -2.269331693649292, | |
| "logps/chosen": -128.03793334960938, | |
| "logps/rejected": -144.99484252929688, | |
| "loss": 132171.75, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.028904268518090248, | |
| "rewards/margins": 0.015704263001680374, | |
| "rewards/rejected": -0.04460852965712547, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.1523046092184369, | |
| "grad_norm": 7264491.777537584, | |
| "learning_rate": 4.710467706013363e-07, | |
| "logits/chosen": -2.2833657264709473, | |
| "logits/rejected": -2.276210308074951, | |
| "logps/chosen": -104.51480865478516, | |
| "logps/rejected": -115.93243408203125, | |
| "loss": 124968.1875, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.01820625737309456, | |
| "rewards/margins": 0.008318779990077019, | |
| "rewards/rejected": -0.026525039225816727, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.156312625250501, | |
| "grad_norm": 9483300.54549026, | |
| "learning_rate": 4.6881959910913137e-07, | |
| "logits/chosen": -2.3335509300231934, | |
| "logits/rejected": -2.3318705558776855, | |
| "logps/chosen": -118.47274017333984, | |
| "logps/rejected": -159.32736206054688, | |
| "loss": 124226.0375, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.024884693324565887, | |
| "rewards/margins": 0.03282006457448006, | |
| "rewards/rejected": -0.057704757899045944, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.16032064128256512, | |
| "grad_norm": 5705393.312794033, | |
| "learning_rate": 4.6659242761692646e-07, | |
| "logits/chosen": -2.1676976680755615, | |
| "logits/rejected": -2.2024545669555664, | |
| "logps/chosen": -128.0613250732422, | |
| "logps/rejected": -154.72410583496094, | |
| "loss": 131599.275, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.03110039234161377, | |
| "rewards/margins": 0.020628096535801888, | |
| "rewards/rejected": -0.05172848701477051, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.16432865731462926, | |
| "grad_norm": 7429207.687017749, | |
| "learning_rate": 4.643652561247216e-07, | |
| "logits/chosen": -2.377800464630127, | |
| "logits/rejected": -2.3630149364471436, | |
| "logps/chosen": -114.31864929199219, | |
| "logps/rejected": -124.80494689941406, | |
| "loss": 129026.4125, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.022269196808338165, | |
| "rewards/margins": 0.007764645852148533, | |
| "rewards/rejected": -0.030033841729164124, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.1683366733466934, | |
| "grad_norm": 6398357.85123705, | |
| "learning_rate": 4.621380846325167e-07, | |
| "logits/chosen": -2.467796802520752, | |
| "logits/rejected": -2.486076831817627, | |
| "logps/chosen": -117.53851318359375, | |
| "logps/rejected": -140.3090362548828, | |
| "loss": 124892.1125, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.01722443476319313, | |
| "rewards/margins": 0.01843501813709736, | |
| "rewards/rejected": -0.03565945476293564, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.17234468937875752, | |
| "grad_norm": 6277691.218109459, | |
| "learning_rate": 4.5991091314031177e-07, | |
| "logits/chosen": -2.3002543449401855, | |
| "logits/rejected": -2.2977206707000732, | |
| "logps/chosen": -104.3665771484375, | |
| "logps/rejected": -117.7842788696289, | |
| "loss": 127817.725, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.017184479162096977, | |
| "rewards/margins": 0.005776461213827133, | |
| "rewards/rejected": -0.02296094223856926, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.17635270541082165, | |
| "grad_norm": 5524654.29238764, | |
| "learning_rate": 4.5768374164810686e-07, | |
| "logits/chosen": -2.3842499256134033, | |
| "logits/rejected": -2.3653392791748047, | |
| "logps/chosen": -106.9764633178711, | |
| "logps/rejected": -128.93316650390625, | |
| "loss": 125052.9375, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.01907220296561718, | |
| "rewards/margins": 0.017310332506895065, | |
| "rewards/rejected": -0.036382537335157394, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.18036072144288579, | |
| "grad_norm": 5698111.597390376, | |
| "learning_rate": 4.55456570155902e-07, | |
| "logits/chosen": -2.4640183448791504, | |
| "logits/rejected": -2.4729461669921875, | |
| "logps/chosen": -116.1183090209961, | |
| "logps/rejected": -126.81109619140625, | |
| "loss": 125015.2, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.02313784509897232, | |
| "rewards/margins": 0.007788621820509434, | |
| "rewards/rejected": -0.03092646598815918, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.1843687374749499, | |
| "grad_norm": 8646711.65753647, | |
| "learning_rate": 4.532293986636971e-07, | |
| "logits/chosen": -2.2500667572021484, | |
| "logits/rejected": -2.2651727199554443, | |
| "logps/chosen": -130.732666015625, | |
| "logps/rejected": -146.08314514160156, | |
| "loss": 128072.8125, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.0223550908267498, | |
| "rewards/margins": 0.015680748969316483, | |
| "rewards/rejected": -0.03803584352135658, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.18837675350701402, | |
| "grad_norm": 7633681.996131759, | |
| "learning_rate": 4.510022271714922e-07, | |
| "logits/chosen": -2.5176281929016113, | |
| "logits/rejected": -2.5100059509277344, | |
| "logps/chosen": -120.24742126464844, | |
| "logps/rejected": -144.9837188720703, | |
| "loss": 128354.825, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.02441255934536457, | |
| "rewards/margins": 0.01506769098341465, | |
| "rewards/rejected": -0.03948025032877922, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.19238476953907815, | |
| "grad_norm": 5929632.8733501285, | |
| "learning_rate": 4.487750556792873e-07, | |
| "logits/chosen": -2.497260093688965, | |
| "logits/rejected": -2.4669649600982666, | |
| "logps/chosen": -129.90042114257812, | |
| "logps/rejected": -138.57948303222656, | |
| "loss": 126991.15, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.02791530452668667, | |
| "rewards/margins": 0.007056623697280884, | |
| "rewards/rejected": -0.0349719300866127, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.1963927855711423, | |
| "grad_norm": 7507688.616841515, | |
| "learning_rate": 4.465478841870824e-07, | |
| "logits/chosen": -2.4561104774475098, | |
| "logits/rejected": -2.406879186630249, | |
| "logps/chosen": -105.32928466796875, | |
| "logps/rejected": -111.3759765625, | |
| "loss": 133037.95, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.02306142821907997, | |
| "rewards/margins": 0.006820513866841793, | |
| "rewards/rejected": -0.02988194301724434, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.20040080160320642, | |
| "grad_norm": 6114303.37476775, | |
| "learning_rate": 4.443207126948775e-07, | |
| "logits/chosen": -2.5643982887268066, | |
| "logits/rejected": -2.5802197456359863, | |
| "logps/chosen": -113.5167236328125, | |
| "logps/rejected": -129.633544921875, | |
| "loss": 126710.2375, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.02158009074628353, | |
| "rewards/margins": 0.008723837323486805, | |
| "rewards/rejected": -0.030303925275802612, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.20440881763527055, | |
| "grad_norm": 5739769.080992031, | |
| "learning_rate": 4.420935412026726e-07, | |
| "logits/chosen": -2.506775379180908, | |
| "logits/rejected": -2.5049405097961426, | |
| "logps/chosen": -113.4487075805664, | |
| "logps/rejected": -121.42533874511719, | |
| "loss": 128230.775, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.023860003799200058, | |
| "rewards/margins": 0.004126023501157761, | |
| "rewards/rejected": -0.027986029163002968, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.20841683366733466, | |
| "grad_norm": 8712683.142514465, | |
| "learning_rate": 4.398663697104677e-07, | |
| "logits/chosen": -2.2912344932556152, | |
| "logits/rejected": -2.2889084815979004, | |
| "logps/chosen": -118.3155746459961, | |
| "logps/rejected": -136.2932891845703, | |
| "loss": 125754.4375, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.02143458090722561, | |
| "rewards/margins": 0.01660776697099209, | |
| "rewards/rejected": -0.0380423478782177, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.2124248496993988, | |
| "grad_norm": 5141990.524270266, | |
| "learning_rate": 4.376391982182628e-07, | |
| "logits/chosen": -2.2442755699157715, | |
| "logits/rejected": -2.258594274520874, | |
| "logps/chosen": -119.4423599243164, | |
| "logps/rejected": -132.0382080078125, | |
| "loss": 126731.7625, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.02435903809964657, | |
| "rewards/margins": 0.011756391264498234, | |
| "rewards/rejected": -0.03611543029546738, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.21643286573146292, | |
| "grad_norm": 7685918.890454359, | |
| "learning_rate": 4.3541202672605785e-07, | |
| "logits/chosen": -2.4173481464385986, | |
| "logits/rejected": -2.4380507469177246, | |
| "logps/chosen": -118.99906158447266, | |
| "logps/rejected": -138.0341339111328, | |
| "loss": 125104.65, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.02268015593290329, | |
| "rewards/margins": 0.014481584541499615, | |
| "rewards/rejected": -0.03716174140572548, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.22044088176352705, | |
| "grad_norm": 6743097.627817877, | |
| "learning_rate": 4.33184855233853e-07, | |
| "logits/chosen": -2.3929710388183594, | |
| "logits/rejected": -2.3947911262512207, | |
| "logps/chosen": -139.71694946289062, | |
| "logps/rejected": -141.9007110595703, | |
| "loss": 130289.3125, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.028682414442300797, | |
| "rewards/margins": 0.004017618950456381, | |
| "rewards/rejected": -0.032700031995773315, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.22444889779559118, | |
| "grad_norm": 7880234.192936395, | |
| "learning_rate": 4.309576837416481e-07, | |
| "logits/chosen": -2.3357808589935303, | |
| "logits/rejected": -2.3500537872314453, | |
| "logps/chosen": -99.57920837402344, | |
| "logps/rejected": -132.2980499267578, | |
| "loss": 123243.8375, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.020820502191781998, | |
| "rewards/margins": 0.028303777799010277, | |
| "rewards/rejected": -0.04912428557872772, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.22845691382765532, | |
| "grad_norm": 5626858.656154921, | |
| "learning_rate": 4.2873051224944316e-07, | |
| "logits/chosen": -2.4428811073303223, | |
| "logits/rejected": -2.437586784362793, | |
| "logps/chosen": -108.9743881225586, | |
| "logps/rejected": -138.54994201660156, | |
| "loss": 126263.475, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.019286539405584335, | |
| "rewards/margins": 0.02130548655986786, | |
| "rewards/rejected": -0.04059202969074249, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.23246492985971945, | |
| "grad_norm": 5580269.332486439, | |
| "learning_rate": 4.2650334075723825e-07, | |
| "logits/chosen": -2.4557948112487793, | |
| "logits/rejected": -2.452768564224243, | |
| "logps/chosen": -116.21510314941406, | |
| "logps/rejected": -130.24020385742188, | |
| "loss": 123232.45, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.025895819067955017, | |
| "rewards/margins": 0.007201328873634338, | |
| "rewards/rejected": -0.033097147941589355, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.23647294589178355, | |
| "grad_norm": 4797099.664206871, | |
| "learning_rate": 4.242761692650334e-07, | |
| "logits/chosen": -2.418750762939453, | |
| "logits/rejected": -2.4155123233795166, | |
| "logps/chosen": -118.62259674072266, | |
| "logps/rejected": -143.041015625, | |
| "loss": 128013.3125, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.02427416667342186, | |
| "rewards/margins": 0.013805478811264038, | |
| "rewards/rejected": -0.0380796417593956, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.24048096192384769, | |
| "grad_norm": 4972883.224938656, | |
| "learning_rate": 4.220489977728285e-07, | |
| "logits/chosen": -2.3690855503082275, | |
| "logits/rejected": -2.3753814697265625, | |
| "logps/chosen": -106.62162017822266, | |
| "logps/rejected": -125.63492584228516, | |
| "loss": 129292.35, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.023660266771912575, | |
| "rewards/margins": 0.00939355418086052, | |
| "rewards/rejected": -0.03305382281541824, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.24448897795591182, | |
| "grad_norm": 6565230.079037476, | |
| "learning_rate": 4.1982182628062357e-07, | |
| "logits/chosen": -2.433472156524658, | |
| "logits/rejected": -2.440901279449463, | |
| "logps/chosen": -100.97681427001953, | |
| "logps/rejected": -121.0699234008789, | |
| "loss": 125732.875, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.021025976166129112, | |
| "rewards/margins": 0.012437298893928528, | |
| "rewards/rejected": -0.03346327692270279, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.24849699398797595, | |
| "grad_norm": 5435060.714939647, | |
| "learning_rate": 4.175946547884187e-07, | |
| "logits/chosen": -2.4333367347717285, | |
| "logits/rejected": -2.4552102088928223, | |
| "logps/chosen": -123.1323013305664, | |
| "logps/rejected": -142.62408447265625, | |
| "loss": 124078.4875, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.025617394596338272, | |
| "rewards/margins": 0.008992002345621586, | |
| "rewards/rejected": -0.03460939601063728, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.25250501002004005, | |
| "grad_norm": 6547015.310004752, | |
| "learning_rate": 4.153674832962138e-07, | |
| "logits/chosen": -2.4262237548828125, | |
| "logits/rejected": -2.444304943084717, | |
| "logps/chosen": -112.52055358886719, | |
| "logps/rejected": -144.73129272460938, | |
| "loss": 126401.4375, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.02900712564587593, | |
| "rewards/margins": 0.020192446187138557, | |
| "rewards/rejected": -0.04919956251978874, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.2565130260521042, | |
| "grad_norm": 6237814.549776749, | |
| "learning_rate": 4.131403118040089e-07, | |
| "logits/chosen": -2.631423234939575, | |
| "logits/rejected": -2.640061140060425, | |
| "logps/chosen": -123.40995788574219, | |
| "logps/rejected": -139.66795349121094, | |
| "loss": 125416.05, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.020980000495910645, | |
| "rewards/margins": 0.02331816963851452, | |
| "rewards/rejected": -0.04429817199707031, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.2605210420841683, | |
| "grad_norm": 3386484.505424407, | |
| "learning_rate": 4.1091314031180397e-07, | |
| "logits/chosen": -2.5698630809783936, | |
| "logits/rejected": -2.5268971920013428, | |
| "logps/chosen": -115.55632019042969, | |
| "logps/rejected": -129.51779174804688, | |
| "loss": 126294.025, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.025962088257074356, | |
| "rewards/margins": 0.012019636109471321, | |
| "rewards/rejected": -0.03798172250390053, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.26452905811623245, | |
| "grad_norm": 5974701.756903167, | |
| "learning_rate": 4.086859688195991e-07, | |
| "logits/chosen": -2.542069673538208, | |
| "logits/rejected": -2.558957815170288, | |
| "logps/chosen": -128.98672485351562, | |
| "logps/rejected": -164.79867553710938, | |
| "loss": 123138.7625, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.025483956560492516, | |
| "rewards/margins": 0.02371075749397278, | |
| "rewards/rejected": -0.04919471591711044, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.2685370741482966, | |
| "grad_norm": 8618946.302311558, | |
| "learning_rate": 4.064587973273942e-07, | |
| "logits/chosen": -2.5645318031311035, | |
| "logits/rejected": -2.5552051067352295, | |
| "logps/chosen": -105.97404479980469, | |
| "logps/rejected": -128.65032958984375, | |
| "loss": 125055.6, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.029579663649201393, | |
| "rewards/margins": 0.009542147628962994, | |
| "rewards/rejected": -0.03912181407213211, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.2725450901803607, | |
| "grad_norm": 6026442.863047762, | |
| "learning_rate": 4.042316258351893e-07, | |
| "logits/chosen": -2.4789493083953857, | |
| "logits/rejected": -2.4921040534973145, | |
| "logps/chosen": -125.647705078125, | |
| "logps/rejected": -129.52207946777344, | |
| "loss": 124416.45, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.02354869619011879, | |
| "rewards/margins": 0.010550996288657188, | |
| "rewards/rejected": -0.03409969061613083, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.27655310621242485, | |
| "grad_norm": 6693842.633577265, | |
| "learning_rate": 4.0200445434298443e-07, | |
| "logits/chosen": -2.3823628425598145, | |
| "logits/rejected": -2.3753132820129395, | |
| "logps/chosen": -126.91679382324219, | |
| "logps/rejected": -137.48599243164062, | |
| "loss": 125631.625, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.029714182019233704, | |
| "rewards/margins": 0.013311244547367096, | |
| "rewards/rejected": -0.0430254265666008, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.280561122244489, | |
| "grad_norm": 6734657.314157365, | |
| "learning_rate": 3.997772828507795e-07, | |
| "logits/chosen": -2.5848867893218994, | |
| "logits/rejected": -2.5814270973205566, | |
| "logps/chosen": -106.8030776977539, | |
| "logps/rejected": -145.386962890625, | |
| "loss": 120258.175, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.028242800384759903, | |
| "rewards/margins": 0.025544622913002968, | |
| "rewards/rejected": -0.05378742143511772, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.2845691382765531, | |
| "grad_norm": 6187383.542849222, | |
| "learning_rate": 3.975501113585746e-07, | |
| "logits/chosen": -2.4657511711120605, | |
| "logits/rejected": -2.4647703170776367, | |
| "logps/chosen": -141.2967071533203, | |
| "logps/rejected": -156.6244354248047, | |
| "loss": 126171.675, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.030644794926047325, | |
| "rewards/margins": 0.012586990371346474, | |
| "rewards/rejected": -0.0432317890226841, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.28857715430861725, | |
| "grad_norm": 7209217.2713718135, | |
| "learning_rate": 3.9532293986636975e-07, | |
| "logits/chosen": -2.5702593326568604, | |
| "logits/rejected": -2.580562114715576, | |
| "logps/chosen": -115.55619049072266, | |
| "logps/rejected": -149.7683868408203, | |
| "loss": 121775.1125, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.030658628791570663, | |
| "rewards/margins": 0.024698719382286072, | |
| "rewards/rejected": -0.05535735562443733, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.2925851703406814, | |
| "grad_norm": 7345760.592322324, | |
| "learning_rate": 3.930957683741648e-07, | |
| "logits/chosen": -2.4842042922973633, | |
| "logits/rejected": -2.519537925720215, | |
| "logps/chosen": -130.01364135742188, | |
| "logps/rejected": -145.00106811523438, | |
| "loss": 125944.025, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.031169170513749123, | |
| "rewards/margins": 0.012248598039150238, | |
| "rewards/rejected": -0.04341777041554451, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.2965931863727455, | |
| "grad_norm": 7769041.8821440255, | |
| "learning_rate": 3.9086859688195987e-07, | |
| "logits/chosen": -2.4142649173736572, | |
| "logits/rejected": -2.3964760303497314, | |
| "logps/chosen": -110.42384338378906, | |
| "logps/rejected": -147.5558319091797, | |
| "loss": 128450.175, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.03280683606863022, | |
| "rewards/margins": 0.02526194415986538, | |
| "rewards/rejected": -0.058068789541721344, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.30060120240480964, | |
| "grad_norm": 5837766.548604017, | |
| "learning_rate": 3.8864142538975496e-07, | |
| "logits/chosen": -2.490830421447754, | |
| "logits/rejected": -2.4848005771636963, | |
| "logps/chosen": -119.2578353881836, | |
| "logps/rejected": -136.1200714111328, | |
| "loss": 129370.05, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.027966167777776718, | |
| "rewards/margins": 0.01382518745958805, | |
| "rewards/rejected": -0.04179135337471962, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.3046092184368738, | |
| "grad_norm": 5653605.464138263, | |
| "learning_rate": 3.864142538975501e-07, | |
| "logits/chosen": -2.6354494094848633, | |
| "logits/rejected": -2.6398041248321533, | |
| "logps/chosen": -119.77166748046875, | |
| "logps/rejected": -135.96331787109375, | |
| "loss": 125964.9875, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.02680077590048313, | |
| "rewards/margins": 0.009917078539729118, | |
| "rewards/rejected": -0.03671785444021225, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.30861723446893785, | |
| "grad_norm": 6202101.053709776, | |
| "learning_rate": 3.841870824053452e-07, | |
| "logits/chosen": -2.656554698944092, | |
| "logits/rejected": -2.6409945487976074, | |
| "logps/chosen": -112.60661315917969, | |
| "logps/rejected": -128.2964630126953, | |
| "loss": 125094.25, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.0260836873203516, | |
| "rewards/margins": 0.011154638603329659, | |
| "rewards/rejected": -0.03723832219839096, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.312625250501002, | |
| "grad_norm": 6165732.580106268, | |
| "learning_rate": 3.819599109131403e-07, | |
| "logits/chosen": -2.7159509658813477, | |
| "logits/rejected": -2.7287096977233887, | |
| "logps/chosen": -102.42594909667969, | |
| "logps/rejected": -126.30348205566406, | |
| "loss": 129263.9, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.021063674241304398, | |
| "rewards/margins": 0.02199883759021759, | |
| "rewards/rejected": -0.043062515556812286, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.3166332665330661, | |
| "grad_norm": 6848518.542334836, | |
| "learning_rate": 3.797327394209354e-07, | |
| "logits/chosen": -2.675846576690674, | |
| "logits/rejected": -2.706200361251831, | |
| "logps/chosen": -118.5123062133789, | |
| "logps/rejected": -142.47341918945312, | |
| "loss": 125051.95, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.024104077368974686, | |
| "rewards/margins": 0.020355774089694023, | |
| "rewards/rejected": -0.04445984959602356, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.32064128256513025, | |
| "grad_norm": 6903511.395968328, | |
| "learning_rate": 3.775055679287305e-07, | |
| "logits/chosen": -2.699876308441162, | |
| "logits/rejected": -2.663015127182007, | |
| "logps/chosen": -129.7513427734375, | |
| "logps/rejected": -159.34945678710938, | |
| "loss": 123485.125, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.029758721590042114, | |
| "rewards/margins": 0.023346439003944397, | |
| "rewards/rejected": -0.05310516431927681, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.3246492985971944, | |
| "grad_norm": 7400217.269596528, | |
| "learning_rate": 3.752783964365256e-07, | |
| "logits/chosen": -2.5823917388916016, | |
| "logits/rejected": -2.597344398498535, | |
| "logps/chosen": -125.24183654785156, | |
| "logps/rejected": -148.7626953125, | |
| "loss": 124403.4625, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.0231742262840271, | |
| "rewards/margins": 0.02486516162753105, | |
| "rewards/rejected": -0.04803938418626785, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.3286573146292585, | |
| "grad_norm": 10022891.443810735, | |
| "learning_rate": 3.730512249443207e-07, | |
| "logits/chosen": -2.5580251216888428, | |
| "logits/rejected": -2.556856393814087, | |
| "logps/chosen": -133.8833465576172, | |
| "logps/rejected": -167.32559204101562, | |
| "loss": 128231.225, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.038261737674474716, | |
| "rewards/margins": 0.021149639040231705, | |
| "rewards/rejected": -0.05941138416528702, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.33266533066132264, | |
| "grad_norm": 7872004.700402355, | |
| "learning_rate": 3.708240534521158e-07, | |
| "logits/chosen": -2.505337953567505, | |
| "logits/rejected": -2.528937816619873, | |
| "logps/chosen": -132.82406616210938, | |
| "logps/rejected": -155.55078125, | |
| "loss": 127033.425, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.02652103267610073, | |
| "rewards/margins": 0.02101508341729641, | |
| "rewards/rejected": -0.04753611236810684, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.3366733466933868, | |
| "grad_norm": 7096774.331416015, | |
| "learning_rate": 3.685968819599109e-07, | |
| "logits/chosen": -2.5368692874908447, | |
| "logits/rejected": -2.535719156265259, | |
| "logps/chosen": -108.67805480957031, | |
| "logps/rejected": -144.25912475585938, | |
| "loss": 124284.075, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.02542160078883171, | |
| "rewards/margins": 0.026436615735292435, | |
| "rewards/rejected": -0.051858216524124146, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.3406813627254509, | |
| "grad_norm": 10050853.271825453, | |
| "learning_rate": 3.66369710467706e-07, | |
| "logits/chosen": -2.5603981018066406, | |
| "logits/rejected": -2.5634753704071045, | |
| "logps/chosen": -129.12669372558594, | |
| "logps/rejected": -153.24151611328125, | |
| "loss": 131637.4125, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.031090397387742996, | |
| "rewards/margins": 0.013068552128970623, | |
| "rewards/rejected": -0.044158950448036194, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.34468937875751504, | |
| "grad_norm": 8788645.52475432, | |
| "learning_rate": 3.6414253897550114e-07, | |
| "logits/chosen": -2.5309653282165527, | |
| "logits/rejected": -2.4787347316741943, | |
| "logps/chosen": -102.19111633300781, | |
| "logps/rejected": -118.55183410644531, | |
| "loss": 125632.975, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.024080926552414894, | |
| "rewards/margins": 0.006669840309768915, | |
| "rewards/rejected": -0.030750762671232224, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.3486973947895792, | |
| "grad_norm": 8574869.451608999, | |
| "learning_rate": 3.619153674832962e-07, | |
| "logits/chosen": -2.6231815814971924, | |
| "logits/rejected": -2.587998390197754, | |
| "logps/chosen": -105.8035659790039, | |
| "logps/rejected": -130.13351440429688, | |
| "loss": 124524.9125, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.020694701001048088, | |
| "rewards/margins": 0.015317901968955994, | |
| "rewards/rejected": -0.03601260110735893, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.3527054108216433, | |
| "grad_norm": 6665344.596513341, | |
| "learning_rate": 3.596881959910913e-07, | |
| "logits/chosen": -2.6597867012023926, | |
| "logits/rejected": -2.6807284355163574, | |
| "logps/chosen": -127.19599914550781, | |
| "logps/rejected": -153.52798461914062, | |
| "loss": 127034.3875, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.025522371754050255, | |
| "rewards/margins": 0.014715611934661865, | |
| "rewards/rejected": -0.04023798182606697, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.35671342685370744, | |
| "grad_norm": 5233225.260742817, | |
| "learning_rate": 3.574610244988864e-07, | |
| "logits/chosen": -2.5910658836364746, | |
| "logits/rejected": -2.5633342266082764, | |
| "logps/chosen": -146.96466064453125, | |
| "logps/rejected": -160.8887481689453, | |
| "loss": 129368.775, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.027096951380372047, | |
| "rewards/margins": 0.007173667661845684, | |
| "rewards/rejected": -0.03427061811089516, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.36072144288577157, | |
| "grad_norm": 7630203.262076153, | |
| "learning_rate": 3.5523385300668154e-07, | |
| "logits/chosen": -2.676250457763672, | |
| "logits/rejected": -2.657402753829956, | |
| "logps/chosen": -101.4141845703125, | |
| "logps/rejected": -121.3133544921875, | |
| "loss": 118632.3875, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.022349627688527107, | |
| "rewards/margins": 0.016511743888258934, | |
| "rewards/rejected": -0.03886137530207634, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.36472945891783565, | |
| "grad_norm": 6160335.089101694, | |
| "learning_rate": 3.530066815144766e-07, | |
| "logits/chosen": -2.663553237915039, | |
| "logits/rejected": -2.6835107803344727, | |
| "logps/chosen": -117.61933898925781, | |
| "logps/rejected": -127.96868896484375, | |
| "loss": 127476.4875, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.028628546744585037, | |
| "rewards/margins": 0.009166366420686245, | |
| "rewards/rejected": -0.03779491409659386, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.3687374749498998, | |
| "grad_norm": 5857190.0709482, | |
| "learning_rate": 3.5077951002227166e-07, | |
| "logits/chosen": -2.57889986038208, | |
| "logits/rejected": -2.5917673110961914, | |
| "logps/chosen": -99.30012512207031, | |
| "logps/rejected": -117.08067321777344, | |
| "loss": 126853.9125, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.023543711751699448, | |
| "rewards/margins": 0.010370884090662003, | |
| "rewards/rejected": -0.03391459211707115, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.3727454909819639, | |
| "grad_norm": 11769408.84668034, | |
| "learning_rate": 3.485523385300668e-07, | |
| "logits/chosen": -2.5779290199279785, | |
| "logits/rejected": -2.543435573577881, | |
| "logps/chosen": -124.24088287353516, | |
| "logps/rejected": -146.30453491210938, | |
| "loss": 125690.2375, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.027858158573508263, | |
| "rewards/margins": 0.017768610268831253, | |
| "rewards/rejected": -0.045626770704984665, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.37675350701402804, | |
| "grad_norm": 6339046.89248737, | |
| "learning_rate": 3.463251670378619e-07, | |
| "logits/chosen": -2.545441150665283, | |
| "logits/rejected": -2.5451228618621826, | |
| "logps/chosen": -119.28828430175781, | |
| "logps/rejected": -136.03775024414062, | |
| "loss": 121444.1875, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.023958856239914894, | |
| "rewards/margins": 0.016655322164297104, | |
| "rewards/rejected": -0.04061417654156685, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.3807615230460922, | |
| "grad_norm": 7905647.567752224, | |
| "learning_rate": 3.44097995545657e-07, | |
| "logits/chosen": -2.489795446395874, | |
| "logits/rejected": -2.474541664123535, | |
| "logps/chosen": -105.68360900878906, | |
| "logps/rejected": -144.86727905273438, | |
| "loss": 123972.2625, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.02857508696615696, | |
| "rewards/margins": 0.035942137241363525, | |
| "rewards/rejected": -0.06451722234487534, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.3847695390781563, | |
| "grad_norm": 7517249.753109076, | |
| "learning_rate": 3.4187082405345207e-07, | |
| "logits/chosen": -2.5300402641296387, | |
| "logits/rejected": -2.551455020904541, | |
| "logps/chosen": -144.90447998046875, | |
| "logps/rejected": -181.8575897216797, | |
| "loss": 129581.9875, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.032712481915950775, | |
| "rewards/margins": 0.023180881515145302, | |
| "rewards/rejected": -0.055893369019031525, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.38877755511022044, | |
| "grad_norm": 21884930.38904731, | |
| "learning_rate": 3.396436525612472e-07, | |
| "logits/chosen": -2.5614724159240723, | |
| "logits/rejected": -2.5662589073181152, | |
| "logps/chosen": -138.80459594726562, | |
| "logps/rejected": -176.45445251464844, | |
| "loss": 129077.6875, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.03729068487882614, | |
| "rewards/margins": 0.023995213210582733, | |
| "rewards/rejected": -0.06128590181469917, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.3927855711422846, | |
| "grad_norm": 6397072.682301449, | |
| "learning_rate": 3.374164810690423e-07, | |
| "logits/chosen": -2.483768939971924, | |
| "logits/rejected": -2.497523069381714, | |
| "logps/chosen": -107.89897155761719, | |
| "logps/rejected": -130.04559326171875, | |
| "loss": 126968.175, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.025696447119116783, | |
| "rewards/margins": 0.019706759601831436, | |
| "rewards/rejected": -0.04540320485830307, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.3967935871743487, | |
| "grad_norm": 6076169.314948489, | |
| "learning_rate": 3.351893095768374e-07, | |
| "logits/chosen": -2.5927734375, | |
| "logits/rejected": -2.589218854904175, | |
| "logps/chosen": -125.49295806884766, | |
| "logps/rejected": -146.27127075195312, | |
| "loss": 130093.2, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.029010172933340073, | |
| "rewards/margins": 0.014808593317866325, | |
| "rewards/rejected": -0.04381876438856125, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.40080160320641284, | |
| "grad_norm": 8944436.503605708, | |
| "learning_rate": 3.329621380846325e-07, | |
| "logits/chosen": -2.4737820625305176, | |
| "logits/rejected": -2.4782590866088867, | |
| "logps/chosen": -115.3978500366211, | |
| "logps/rejected": -139.7859344482422, | |
| "loss": 125983.45, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.03420211002230644, | |
| "rewards/margins": 0.018732238560914993, | |
| "rewards/rejected": -0.052934348583221436, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.40480961923847697, | |
| "grad_norm": 6834249.220142525, | |
| "learning_rate": 3.307349665924276e-07, | |
| "logits/chosen": -2.4906249046325684, | |
| "logits/rejected": -2.478178024291992, | |
| "logps/chosen": -130.49822998046875, | |
| "logps/rejected": -145.3443145751953, | |
| "loss": 125360.8375, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.02826479636132717, | |
| "rewards/margins": 0.01623808778822422, | |
| "rewards/rejected": -0.04450288414955139, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.4088176352705411, | |
| "grad_norm": 6383495.447630203, | |
| "learning_rate": 3.285077951002227e-07, | |
| "logits/chosen": -2.5950496196746826, | |
| "logits/rejected": -2.5853590965270996, | |
| "logps/chosen": -109.3490219116211, | |
| "logps/rejected": -135.37911987304688, | |
| "loss": 125451.1625, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.02749260701239109, | |
| "rewards/margins": 0.01786484755575657, | |
| "rewards/rejected": -0.04535745456814766, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.41282565130260523, | |
| "grad_norm": 6800961.848727982, | |
| "learning_rate": 3.262806236080178e-07, | |
| "logits/chosen": -2.527940273284912, | |
| "logits/rejected": -2.538398265838623, | |
| "logps/chosen": -116.2686538696289, | |
| "logps/rejected": -166.8658905029297, | |
| "loss": 121559.9375, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.026556119322776794, | |
| "rewards/margins": 0.028276193886995316, | |
| "rewards/rejected": -0.05483230948448181, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.4168336673346693, | |
| "grad_norm": 6862016.969047391, | |
| "learning_rate": 3.2405345211581293e-07, | |
| "logits/chosen": -2.429194211959839, | |
| "logits/rejected": -2.442253589630127, | |
| "logps/chosen": -129.10696411132812, | |
| "logps/rejected": -149.61363220214844, | |
| "loss": 127840.825, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.03121834062039852, | |
| "rewards/margins": 0.017258524894714355, | |
| "rewards/rejected": -0.048476867377758026, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.42084168336673344, | |
| "grad_norm": 7173695.345872832, | |
| "learning_rate": 3.21826280623608e-07, | |
| "logits/chosen": -2.4863028526306152, | |
| "logits/rejected": -2.498213291168213, | |
| "logps/chosen": -127.5420913696289, | |
| "logps/rejected": -162.05979919433594, | |
| "loss": 122006.5375, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.020269382745027542, | |
| "rewards/margins": 0.02875341847538948, | |
| "rewards/rejected": -0.04902280122041702, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.4248496993987976, | |
| "grad_norm": 6915379.934117418, | |
| "learning_rate": 3.195991091314031e-07, | |
| "logits/chosen": -2.5418026447296143, | |
| "logits/rejected": -2.5308048725128174, | |
| "logps/chosen": -115.8625717163086, | |
| "logps/rejected": -144.80714416503906, | |
| "loss": 121081.8, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.019701775163412094, | |
| "rewards/margins": 0.03037576749920845, | |
| "rewards/rejected": -0.05007754638791084, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.4288577154308617, | |
| "grad_norm": 7460651.8397620395, | |
| "learning_rate": 3.1737193763919825e-07, | |
| "logits/chosen": -2.5827364921569824, | |
| "logits/rejected": -2.5804123878479004, | |
| "logps/chosen": -108.82574462890625, | |
| "logps/rejected": -138.85072326660156, | |
| "loss": 120443.35, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.019836189225316048, | |
| "rewards/margins": 0.029924744740128517, | |
| "rewards/rejected": -0.049760930240154266, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.43286573146292584, | |
| "grad_norm": 8373274.336152132, | |
| "learning_rate": 3.1514476614699334e-07, | |
| "logits/chosen": -2.516143321990967, | |
| "logits/rejected": -2.4642560482025146, | |
| "logps/chosen": -123.48294830322266, | |
| "logps/rejected": -164.36378479003906, | |
| "loss": 121728.3875, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.03256348520517349, | |
| "rewards/margins": 0.03419329971075058, | |
| "rewards/rejected": -0.06675679236650467, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.43687374749499, | |
| "grad_norm": 8184337.333100434, | |
| "learning_rate": 3.129175946547884e-07, | |
| "logits/chosen": -2.560567855834961, | |
| "logits/rejected": -2.524467945098877, | |
| "logps/chosen": -121.753662109375, | |
| "logps/rejected": -148.0128173828125, | |
| "loss": 127275.2125, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.037222061306238174, | |
| "rewards/margins": 0.01743399165570736, | |
| "rewards/rejected": -0.054656051099300385, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.4408817635270541, | |
| "grad_norm": 6811268.460877864, | |
| "learning_rate": 3.1069042316258346e-07, | |
| "logits/chosen": -2.541689395904541, | |
| "logits/rejected": -2.5355188846588135, | |
| "logps/chosen": -118.40226745605469, | |
| "logps/rejected": -146.7948455810547, | |
| "loss": 129991.525, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.029409324750304222, | |
| "rewards/margins": 0.021463513374328613, | |
| "rewards/rejected": -0.050872839987277985, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.44488977955911824, | |
| "grad_norm": 6657005.1585574625, | |
| "learning_rate": 3.084632516703786e-07, | |
| "logits/chosen": -2.6857261657714844, | |
| "logits/rejected": -2.672269344329834, | |
| "logps/chosen": -101.30345153808594, | |
| "logps/rejected": -136.15231323242188, | |
| "loss": 122391.7, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.027355913072824478, | |
| "rewards/margins": 0.026283621788024902, | |
| "rewards/rejected": -0.05363954231142998, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.44889779559118237, | |
| "grad_norm": 7161516.5646296, | |
| "learning_rate": 3.062360801781737e-07, | |
| "logits/chosen": -2.492475748062134, | |
| "logits/rejected": -2.4887194633483887, | |
| "logps/chosen": -104.93692779541016, | |
| "logps/rejected": -122.68449401855469, | |
| "loss": 128814.6, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.02373102679848671, | |
| "rewards/margins": 0.012768360786139965, | |
| "rewards/rejected": -0.03649938851594925, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.4529058116232465, | |
| "grad_norm": 7008942.925795235, | |
| "learning_rate": 3.040089086859688e-07, | |
| "logits/chosen": -2.5698654651641846, | |
| "logits/rejected": -2.572453498840332, | |
| "logps/chosen": -116.10699462890625, | |
| "logps/rejected": -147.9447479248047, | |
| "loss": 122371.8, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.03228304535150528, | |
| "rewards/margins": 0.02537880465388298, | |
| "rewards/rejected": -0.05766185000538826, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.45691382765531063, | |
| "grad_norm": 6146790.319547828, | |
| "learning_rate": 3.017817371937639e-07, | |
| "logits/chosen": -2.652864933013916, | |
| "logits/rejected": -2.6149935722351074, | |
| "logps/chosen": -122.29400634765625, | |
| "logps/rejected": -152.6931915283203, | |
| "loss": 119369.45, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.030378807336091995, | |
| "rewards/margins": 0.018168287351727486, | |
| "rewards/rejected": -0.04854709282517433, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.46092184368737477, | |
| "grad_norm": 10274811.215051277, | |
| "learning_rate": 2.99554565701559e-07, | |
| "logits/chosen": -2.521822690963745, | |
| "logits/rejected": -2.530947685241699, | |
| "logps/chosen": -130.49484252929688, | |
| "logps/rejected": -161.52488708496094, | |
| "loss": 126010.9375, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.02795564755797386, | |
| "rewards/margins": 0.026814181357622147, | |
| "rewards/rejected": -0.05476983264088631, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.4649298597194389, | |
| "grad_norm": 5529395.079244611, | |
| "learning_rate": 2.973273942093541e-07, | |
| "logits/chosen": -2.5171029567718506, | |
| "logits/rejected": -2.4763035774230957, | |
| "logps/chosen": -125.2338638305664, | |
| "logps/rejected": -178.67019653320312, | |
| "loss": 125064.3875, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.02554917335510254, | |
| "rewards/margins": 0.03890024498105049, | |
| "rewards/rejected": -0.06444941461086273, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.46893787575150303, | |
| "grad_norm": 6908649.776157191, | |
| "learning_rate": 2.951002227171492e-07, | |
| "logits/chosen": -2.5064730644226074, | |
| "logits/rejected": -2.4823849201202393, | |
| "logps/chosen": -138.76272583007812, | |
| "logps/rejected": -171.69248962402344, | |
| "loss": 121075.625, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.03167058899998665, | |
| "rewards/margins": 0.03182462602853775, | |
| "rewards/rejected": -0.0634952187538147, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.4729458917835671, | |
| "grad_norm": 7591675.76560546, | |
| "learning_rate": 2.928730512249443e-07, | |
| "logits/chosen": -2.518094301223755, | |
| "logits/rejected": -2.5124518871307373, | |
| "logps/chosen": -106.98667907714844, | |
| "logps/rejected": -129.47740173339844, | |
| "loss": 124332.4125, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.028386671096086502, | |
| "rewards/margins": 0.013656134717166424, | |
| "rewards/rejected": -0.0420428030192852, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.47695390781563124, | |
| "grad_norm": 5281912.838243102, | |
| "learning_rate": 2.906458797327394e-07, | |
| "logits/chosen": -2.453968048095703, | |
| "logits/rejected": -2.478548526763916, | |
| "logps/chosen": -109.14216613769531, | |
| "logps/rejected": -177.39352416992188, | |
| "loss": 117646.8375, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.026093561202287674, | |
| "rewards/margins": 0.055483561009168625, | |
| "rewards/rejected": -0.0815771296620369, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.48096192384769537, | |
| "grad_norm": 6902598.262857252, | |
| "learning_rate": 2.884187082405345e-07, | |
| "logits/chosen": -2.65455961227417, | |
| "logits/rejected": -2.6624550819396973, | |
| "logps/chosen": -114.522216796875, | |
| "logps/rejected": -130.91445922851562, | |
| "loss": 120296.1625, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.0260526891797781, | |
| "rewards/margins": 0.011702237650752068, | |
| "rewards/rejected": -0.03775492683053017, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.4849699398797595, | |
| "grad_norm": 9529297.592613008, | |
| "learning_rate": 2.8619153674832964e-07, | |
| "logits/chosen": -2.431119918823242, | |
| "logits/rejected": -2.4216580390930176, | |
| "logps/chosen": -133.5395965576172, | |
| "logps/rejected": -160.01345825195312, | |
| "loss": 123218.5875, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.03321906179189682, | |
| "rewards/margins": 0.023233687505126, | |
| "rewards/rejected": -0.05645275115966797, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.48897795591182364, | |
| "grad_norm": 7763735.694070514, | |
| "learning_rate": 2.839643652561247e-07, | |
| "logits/chosen": -2.512303590774536, | |
| "logits/rejected": -2.511324644088745, | |
| "logps/chosen": -126.4686050415039, | |
| "logps/rejected": -170.10006713867188, | |
| "loss": 119174.075, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.028899723663926125, | |
| "rewards/margins": 0.0382329560816288, | |
| "rewards/rejected": -0.06713266670703888, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.49298597194388777, | |
| "grad_norm": 9045824.328324866, | |
| "learning_rate": 2.817371937639198e-07, | |
| "logits/chosen": -2.5191609859466553, | |
| "logits/rejected": -2.52032732963562, | |
| "logps/chosen": -134.53079223632812, | |
| "logps/rejected": -167.50125122070312, | |
| "loss": 126851.1375, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.03347449749708176, | |
| "rewards/margins": 0.019832942634820938, | |
| "rewards/rejected": -0.053307436406612396, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.4969939879759519, | |
| "grad_norm": 10448175.950927077, | |
| "learning_rate": 2.795100222717149e-07, | |
| "logits/chosen": -2.5834548473358154, | |
| "logits/rejected": -2.5978755950927734, | |
| "logps/chosen": -118.474365234375, | |
| "logps/rejected": -161.09974670410156, | |
| "loss": 123254.1375, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.029140984639525414, | |
| "rewards/margins": 0.03318404406309128, | |
| "rewards/rejected": -0.062325023114681244, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.501002004008016, | |
| "grad_norm": 8500418.118135955, | |
| "learning_rate": 2.7728285077951004e-07, | |
| "logits/chosen": -2.5313282012939453, | |
| "logits/rejected": -2.5623884201049805, | |
| "logps/chosen": -125.45368957519531, | |
| "logps/rejected": -148.5553741455078, | |
| "loss": 125882.25, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.028084218502044678, | |
| "rewards/margins": 0.027083024382591248, | |
| "rewards/rejected": -0.05516723915934563, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.5050100200400801, | |
| "grad_norm": 10978867.823274264, | |
| "learning_rate": 2.7505567928730513e-07, | |
| "logits/chosen": -2.6042990684509277, | |
| "logits/rejected": -2.60687255859375, | |
| "logps/chosen": -121.44742584228516, | |
| "logps/rejected": -154.75619506835938, | |
| "loss": 121126.55, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.02883894369006157, | |
| "rewards/margins": 0.022157009690999985, | |
| "rewards/rejected": -0.05099595710635185, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.5090180360721442, | |
| "grad_norm": 6172018.078167409, | |
| "learning_rate": 2.728285077951002e-07, | |
| "logits/chosen": -2.5717759132385254, | |
| "logits/rejected": -2.5282435417175293, | |
| "logps/chosen": -109.5957260131836, | |
| "logps/rejected": -140.98318481445312, | |
| "loss": 120695.4875, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.022453511133790016, | |
| "rewards/margins": 0.03208887577056885, | |
| "rewards/rejected": -0.054542385041713715, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.5130260521042084, | |
| "grad_norm": 5911426.409929097, | |
| "learning_rate": 2.7060133630289536e-07, | |
| "logits/chosen": -2.6769793033599854, | |
| "logits/rejected": -2.690333366394043, | |
| "logps/chosen": -110.48323059082031, | |
| "logps/rejected": -141.57073974609375, | |
| "loss": 129327.425, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.025987869128584862, | |
| "rewards/margins": 0.01646535098552704, | |
| "rewards/rejected": -0.04245322197675705, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.5170340681362725, | |
| "grad_norm": 6979377.80404185, | |
| "learning_rate": 2.683741648106904e-07, | |
| "logits/chosen": -2.5594074726104736, | |
| "logits/rejected": -2.513986110687256, | |
| "logps/chosen": -119.63285827636719, | |
| "logps/rejected": -139.09017944335938, | |
| "loss": 127555.7375, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.024876989424228668, | |
| "rewards/margins": 0.013020751997828484, | |
| "rewards/rejected": -0.0378977432847023, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.5210420841683366, | |
| "grad_norm": 5292590.790815719, | |
| "learning_rate": 2.661469933184855e-07, | |
| "logits/chosen": -2.586153030395508, | |
| "logits/rejected": -2.620682716369629, | |
| "logps/chosen": -110.29219055175781, | |
| "logps/rejected": -155.91311645507812, | |
| "loss": 123184.1125, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.022849615663290024, | |
| "rewards/margins": 0.03136241436004639, | |
| "rewards/rejected": -0.05421202629804611, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.5250501002004008, | |
| "grad_norm": 6798885.42996808, | |
| "learning_rate": 2.6391982182628057e-07, | |
| "logits/chosen": -2.536839246749878, | |
| "logits/rejected": -2.488548517227173, | |
| "logps/chosen": -114.6366195678711, | |
| "logps/rejected": -137.06741333007812, | |
| "loss": 123873.05, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.028652016073465347, | |
| "rewards/margins": 0.020913179963827133, | |
| "rewards/rejected": -0.04956519976258278, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.5290581162324649, | |
| "grad_norm": 6144525.590699139, | |
| "learning_rate": 2.616926503340757e-07, | |
| "logits/chosen": -2.530562162399292, | |
| "logits/rejected": -2.5206761360168457, | |
| "logps/chosen": -130.77320861816406, | |
| "logps/rejected": -152.3673858642578, | |
| "loss": 123906.025, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.027242273092269897, | |
| "rewards/margins": 0.016565924510359764, | |
| "rewards/rejected": -0.04380819946527481, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.533066132264529, | |
| "grad_norm": 8324367.487045379, | |
| "learning_rate": 2.594654788418708e-07, | |
| "logits/chosen": -2.4228427410125732, | |
| "logits/rejected": -2.416536808013916, | |
| "logps/chosen": -113.18867492675781, | |
| "logps/rejected": -136.65492248535156, | |
| "loss": 127855.825, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.028934326022863388, | |
| "rewards/margins": 0.015154870226979256, | |
| "rewards/rejected": -0.04408919811248779, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.5370741482965932, | |
| "grad_norm": 7499303.936485078, | |
| "learning_rate": 2.572383073496659e-07, | |
| "logits/chosen": -2.514617443084717, | |
| "logits/rejected": -2.522400140762329, | |
| "logps/chosen": -133.8329315185547, | |
| "logps/rejected": -168.91912841796875, | |
| "loss": 123154.5875, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.03134072571992874, | |
| "rewards/margins": 0.031129617244005203, | |
| "rewards/rejected": -0.06247033551335335, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.5410821643286573, | |
| "grad_norm": 7001866.440455517, | |
| "learning_rate": 2.5501113585746103e-07, | |
| "logits/chosen": -2.424100160598755, | |
| "logits/rejected": -2.391080856323242, | |
| "logps/chosen": -129.46676635742188, | |
| "logps/rejected": -170.42715454101562, | |
| "loss": 121434.3125, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.02975108101963997, | |
| "rewards/margins": 0.0271878931671381, | |
| "rewards/rejected": -0.05693897604942322, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.5450901803607214, | |
| "grad_norm": 8973879.608996509, | |
| "learning_rate": 2.527839643652561e-07, | |
| "logits/chosen": -2.455371618270874, | |
| "logits/rejected": -2.448552131652832, | |
| "logps/chosen": -107.38471984863281, | |
| "logps/rejected": -153.23446655273438, | |
| "loss": 122286.9875, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.02663249336183071, | |
| "rewards/margins": 0.0314641147851944, | |
| "rewards/rejected": -0.05809660628437996, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.5490981963927856, | |
| "grad_norm": 6355878.402179637, | |
| "learning_rate": 2.505567928730512e-07, | |
| "logits/chosen": -2.506803035736084, | |
| "logits/rejected": -2.485286235809326, | |
| "logps/chosen": -105.56050109863281, | |
| "logps/rejected": -143.86761474609375, | |
| "loss": 123893.0875, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.026928072795271873, | |
| "rewards/margins": 0.030396688729524612, | |
| "rewards/rejected": -0.05732475593686104, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.5531062124248497, | |
| "grad_norm": 6945767.3333855895, | |
| "learning_rate": 2.483296213808463e-07, | |
| "logits/chosen": -2.563617467880249, | |
| "logits/rejected": -2.5720462799072266, | |
| "logps/chosen": -120.37355041503906, | |
| "logps/rejected": -134.89710998535156, | |
| "loss": 126817.7625, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.029632825404405594, | |
| "rewards/margins": 0.01115390844643116, | |
| "rewards/rejected": -0.040786728262901306, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.5571142284569138, | |
| "grad_norm": 6523846.353792737, | |
| "learning_rate": 2.4610244988864143e-07, | |
| "logits/chosen": -2.3972599506378174, | |
| "logits/rejected": -2.3907814025878906, | |
| "logps/chosen": -137.69276428222656, | |
| "logps/rejected": -148.29226684570312, | |
| "loss": 121494.9875, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.0320330411195755, | |
| "rewards/margins": 0.012286066077649593, | |
| "rewards/rejected": -0.04431910812854767, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.561122244488978, | |
| "grad_norm": 7675269.679056767, | |
| "learning_rate": 2.438752783964365e-07, | |
| "logits/chosen": -2.477886199951172, | |
| "logits/rejected": -2.5009713172912598, | |
| "logps/chosen": -127.43409729003906, | |
| "logps/rejected": -148.87342834472656, | |
| "loss": 128807.55, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.025569623336195946, | |
| "rewards/margins": 0.016508014872670174, | |
| "rewards/rejected": -0.04207763820886612, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.5651302605210421, | |
| "grad_norm": 7431700.440385598, | |
| "learning_rate": 2.416481069042316e-07, | |
| "logits/chosen": -2.475954532623291, | |
| "logits/rejected": -2.468047857284546, | |
| "logps/chosen": -125.47874450683594, | |
| "logps/rejected": -153.934326171875, | |
| "loss": 128646.025, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.027879610657691956, | |
| "rewards/margins": 0.025334885343909264, | |
| "rewards/rejected": -0.05321450158953667, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.5691382765531062, | |
| "grad_norm": 6899812.52820539, | |
| "learning_rate": 2.394209354120267e-07, | |
| "logits/chosen": -2.522841691970825, | |
| "logits/rejected": -2.486010789871216, | |
| "logps/chosen": -122.23579406738281, | |
| "logps/rejected": -163.36984252929688, | |
| "loss": 122211.775, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.02656758762896061, | |
| "rewards/margins": 0.03301847726106644, | |
| "rewards/rejected": -0.059586066752672195, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.5731462925851704, | |
| "grad_norm": 6173557.341510408, | |
| "learning_rate": 2.371937639198218e-07, | |
| "logits/chosen": -2.423492193222046, | |
| "logits/rejected": -2.4019968509674072, | |
| "logps/chosen": -130.19174194335938, | |
| "logps/rejected": -160.64743041992188, | |
| "loss": 120977.9625, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.031222287565469742, | |
| "rewards/margins": 0.02653447352349758, | |
| "rewards/rejected": -0.057756759226322174, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.5771543086172345, | |
| "grad_norm": 7643405.223271913, | |
| "learning_rate": 2.349665924276169e-07, | |
| "logits/chosen": -2.5014212131500244, | |
| "logits/rejected": -2.527346134185791, | |
| "logps/chosen": -119.39387512207031, | |
| "logps/rejected": -138.7740478515625, | |
| "loss": 124317.4, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.02439231611788273, | |
| "rewards/margins": 0.021347712725400925, | |
| "rewards/rejected": -0.0457400307059288, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.5811623246492986, | |
| "grad_norm": 6692302.873722134, | |
| "learning_rate": 2.32739420935412e-07, | |
| "logits/chosen": -2.5462992191314697, | |
| "logits/rejected": -2.5556600093841553, | |
| "logps/chosen": -124.85557556152344, | |
| "logps/rejected": -163.96566772460938, | |
| "loss": 123862.875, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.024991046637296677, | |
| "rewards/margins": 0.02179926075041294, | |
| "rewards/rejected": -0.04679030552506447, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.5851703406813628, | |
| "grad_norm": 7345120.15663842, | |
| "learning_rate": 2.3051224944320713e-07, | |
| "logits/chosen": -2.5199873447418213, | |
| "logits/rejected": -2.4958107471466064, | |
| "logps/chosen": -126.18660736083984, | |
| "logps/rejected": -143.85592651367188, | |
| "loss": 126067.1625, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.02991018258035183, | |
| "rewards/margins": 0.019812356680631638, | |
| "rewards/rejected": -0.049722544848918915, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.5891783567134269, | |
| "grad_norm": 6853174.117141145, | |
| "learning_rate": 2.2828507795100222e-07, | |
| "logits/chosen": -2.4920763969421387, | |
| "logits/rejected": -2.4885401725769043, | |
| "logps/chosen": -121.91841125488281, | |
| "logps/rejected": -166.50521850585938, | |
| "loss": 126326.975, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.030897056683897972, | |
| "rewards/margins": 0.032330263406038284, | |
| "rewards/rejected": -0.06322731822729111, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.593186372745491, | |
| "grad_norm": 7779562.538080393, | |
| "learning_rate": 2.2605790645879733e-07, | |
| "logits/chosen": -2.382236957550049, | |
| "logits/rejected": -2.3837084770202637, | |
| "logps/chosen": -142.3158721923828, | |
| "logps/rejected": -155.12033081054688, | |
| "loss": 130777.2875, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.03417587652802467, | |
| "rewards/margins": 0.012108733877539635, | |
| "rewards/rejected": -0.04628460854291916, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.5971943887775552, | |
| "grad_norm": 6279182.617938158, | |
| "learning_rate": 2.2383073496659242e-07, | |
| "logits/chosen": -2.432779312133789, | |
| "logits/rejected": -2.4290943145751953, | |
| "logps/chosen": -128.83761596679688, | |
| "logps/rejected": -149.1236114501953, | |
| "loss": 124363.65, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.032311566174030304, | |
| "rewards/margins": 0.015544983558356762, | |
| "rewards/rejected": -0.04785655066370964, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.6012024048096193, | |
| "grad_norm": 8182544.255090159, | |
| "learning_rate": 2.2160356347438753e-07, | |
| "logits/chosen": -2.491617441177368, | |
| "logits/rejected": -2.482922077178955, | |
| "logps/chosen": -134.8472137451172, | |
| "logps/rejected": -160.60122680664062, | |
| "loss": 128824.125, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.03733091801404953, | |
| "rewards/margins": 0.017995206639170647, | |
| "rewards/rejected": -0.055326126515865326, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.6052104208416834, | |
| "grad_norm": 7016110.496399047, | |
| "learning_rate": 2.1937639198218262e-07, | |
| "logits/chosen": -2.466414451599121, | |
| "logits/rejected": -2.479168176651001, | |
| "logps/chosen": -133.4922332763672, | |
| "logps/rejected": -155.50863647460938, | |
| "loss": 123948.275, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.0342363640666008, | |
| "rewards/margins": 0.013794171623885632, | |
| "rewards/rejected": -0.04803053289651871, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.6092184368737475, | |
| "grad_norm": 6302317.5145249935, | |
| "learning_rate": 2.171492204899777e-07, | |
| "logits/chosen": -2.4898009300231934, | |
| "logits/rejected": -2.528862237930298, | |
| "logps/chosen": -122.98271179199219, | |
| "logps/rejected": -146.36402893066406, | |
| "loss": 123657.8875, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.02533816173672676, | |
| "rewards/margins": 0.019133783876895905, | |
| "rewards/rejected": -0.044471945613622665, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.6132264529058116, | |
| "grad_norm": 6357254.947381178, | |
| "learning_rate": 2.1492204899777282e-07, | |
| "logits/chosen": -2.3794853687286377, | |
| "logits/rejected": -2.387608528137207, | |
| "logps/chosen": -117.59773254394531, | |
| "logps/rejected": -149.55462646484375, | |
| "loss": 121729.5375, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.027486393228173256, | |
| "rewards/margins": 0.026237377896904945, | |
| "rewards/rejected": -0.053723763674497604, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.6172344689378757, | |
| "grad_norm": 5337503.057957096, | |
| "learning_rate": 2.126948775055679e-07, | |
| "logits/chosen": -2.4973270893096924, | |
| "logits/rejected": -2.479884386062622, | |
| "logps/chosen": -108.48951721191406, | |
| "logps/rejected": -132.74342346191406, | |
| "loss": 128555.15, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.028234243392944336, | |
| "rewards/margins": 0.017721932381391525, | |
| "rewards/rejected": -0.04595617204904556, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.6212424849699398, | |
| "grad_norm": 6436275.76492097, | |
| "learning_rate": 2.1046770601336302e-07, | |
| "logits/chosen": -2.472238779067993, | |
| "logits/rejected": -2.4871106147766113, | |
| "logps/chosen": -138.1797637939453, | |
| "logps/rejected": -165.5472869873047, | |
| "loss": 128596.475, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.02902204915881157, | |
| "rewards/margins": 0.022927356883883476, | |
| "rewards/rejected": -0.051949404180049896, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.625250501002004, | |
| "grad_norm": 7874140.319482003, | |
| "learning_rate": 2.082405345211581e-07, | |
| "logits/chosen": -2.4984288215637207, | |
| "logits/rejected": -2.5187220573425293, | |
| "logps/chosen": -112.08372497558594, | |
| "logps/rejected": -143.27493286132812, | |
| "loss": 123719.8125, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.029770880937576294, | |
| "rewards/margins": 0.023414723575115204, | |
| "rewards/rejected": -0.0531856045126915, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.6292585170340681, | |
| "grad_norm": 7803857.792752564, | |
| "learning_rate": 2.0601336302895323e-07, | |
| "logits/chosen": -2.521970510482788, | |
| "logits/rejected": -2.5433990955352783, | |
| "logps/chosen": -156.52963256835938, | |
| "logps/rejected": -190.4028778076172, | |
| "loss": 120760.1375, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.029155146330595016, | |
| "rewards/margins": 0.03910597413778305, | |
| "rewards/rejected": -0.06826111674308777, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.6332665330661322, | |
| "grad_norm": 5933142.237627983, | |
| "learning_rate": 2.0378619153674831e-07, | |
| "logits/chosen": -2.5751547813415527, | |
| "logits/rejected": -2.537017345428467, | |
| "logps/chosen": -112.1880111694336, | |
| "logps/rejected": -142.88565063476562, | |
| "loss": 121429.8375, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.026801547035574913, | |
| "rewards/margins": 0.02268964797258377, | |
| "rewards/rejected": -0.049491189420223236, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.6372745490981964, | |
| "grad_norm": 6625290.260166941, | |
| "learning_rate": 2.0155902004454343e-07, | |
| "logits/chosen": -2.4771504402160645, | |
| "logits/rejected": -2.490891933441162, | |
| "logps/chosen": -114.3309326171875, | |
| "logps/rejected": -150.93031311035156, | |
| "loss": 120093.325, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.03233719617128372, | |
| "rewards/margins": 0.031473204493522644, | |
| "rewards/rejected": -0.06381039321422577, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.6412825651302605, | |
| "grad_norm": 8931578.977132296, | |
| "learning_rate": 1.9933184855233854e-07, | |
| "logits/chosen": -2.410708427429199, | |
| "logits/rejected": -2.393162250518799, | |
| "logps/chosen": -128.2466583251953, | |
| "logps/rejected": -157.00393676757812, | |
| "loss": 129014.2, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.031892385333776474, | |
| "rewards/margins": 0.03138261288404465, | |
| "rewards/rejected": -0.06327499449253082, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.6452905811623246, | |
| "grad_norm": 7621241.075310516, | |
| "learning_rate": 1.971046770601336e-07, | |
| "logits/chosen": -2.4275999069213867, | |
| "logits/rejected": -2.436274290084839, | |
| "logps/chosen": -135.43801879882812, | |
| "logps/rejected": -174.9510955810547, | |
| "loss": 121219.5125, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.025171224027872086, | |
| "rewards/margins": 0.024888776242733, | |
| "rewards/rejected": -0.050060003995895386, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.6492985971943888, | |
| "grad_norm": 6822723.418880638, | |
| "learning_rate": 1.9487750556792872e-07, | |
| "logits/chosen": -2.488560199737549, | |
| "logits/rejected": -2.467193126678467, | |
| "logps/chosen": -116.24674987792969, | |
| "logps/rejected": -135.80844116210938, | |
| "loss": 122399.8625, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.02936776913702488, | |
| "rewards/margins": 0.018525371327996254, | |
| "rewards/rejected": -0.04789314419031143, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.6533066132264529, | |
| "grad_norm": 7659606.2078440925, | |
| "learning_rate": 1.926503340757238e-07, | |
| "logits/chosen": -2.4457767009735107, | |
| "logits/rejected": -2.443535327911377, | |
| "logps/chosen": -120.93404388427734, | |
| "logps/rejected": -173.1234130859375, | |
| "loss": 117188.175, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.02749781683087349, | |
| "rewards/margins": 0.040742214769124985, | |
| "rewards/rejected": -0.06824003159999847, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.657314629258517, | |
| "grad_norm": 10173287.988396857, | |
| "learning_rate": 1.9042316258351892e-07, | |
| "logits/chosen": -2.5128328800201416, | |
| "logits/rejected": -2.513092041015625, | |
| "logps/chosen": -140.1425018310547, | |
| "logps/rejected": -157.23348999023438, | |
| "loss": 129493.4375, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.03822711110115051, | |
| "rewards/margins": 0.012729940004646778, | |
| "rewards/rejected": -0.05095704644918442, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.6613226452905812, | |
| "grad_norm": 6527946.473628513, | |
| "learning_rate": 1.88195991091314e-07, | |
| "logits/chosen": -2.5390524864196777, | |
| "logits/rejected": -2.547598361968994, | |
| "logps/chosen": -117.64924621582031, | |
| "logps/rejected": -163.60293579101562, | |
| "loss": 118917.6, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.025879234075546265, | |
| "rewards/margins": 0.04024052247405052, | |
| "rewards/rejected": -0.06611974537372589, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.6653306613226453, | |
| "grad_norm": 4891201.175696377, | |
| "learning_rate": 1.8596881959910912e-07, | |
| "logits/chosen": -2.3803093433380127, | |
| "logits/rejected": -2.349740743637085, | |
| "logps/chosen": -128.08169555664062, | |
| "logps/rejected": -154.24667358398438, | |
| "loss": 127419.9875, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.029361028224229813, | |
| "rewards/margins": 0.01856454275548458, | |
| "rewards/rejected": -0.047925569117069244, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.6693386773547094, | |
| "grad_norm": 6543519.27422337, | |
| "learning_rate": 1.8374164810690424e-07, | |
| "logits/chosen": -2.527883529663086, | |
| "logits/rejected": -2.515263319015503, | |
| "logps/chosen": -118.89616394042969, | |
| "logps/rejected": -147.4104461669922, | |
| "loss": 118822.1375, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.01884353719651699, | |
| "rewards/margins": 0.0279966089874506, | |
| "rewards/rejected": -0.04684014618396759, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.6733466933867736, | |
| "grad_norm": 7806417.669748601, | |
| "learning_rate": 1.8151447661469933e-07, | |
| "logits/chosen": -2.4772043228149414, | |
| "logits/rejected": -2.515587329864502, | |
| "logps/chosen": -127.4335708618164, | |
| "logps/rejected": -167.4239044189453, | |
| "loss": 124433.9625, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.029232731088995934, | |
| "rewards/margins": 0.029919158667325974, | |
| "rewards/rejected": -0.059151895344257355, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.6773547094188377, | |
| "grad_norm": 7678118.878557649, | |
| "learning_rate": 1.7928730512249444e-07, | |
| "logits/chosen": -2.3632655143737793, | |
| "logits/rejected": -2.3506579399108887, | |
| "logps/chosen": -131.25975036621094, | |
| "logps/rejected": -157.8623046875, | |
| "loss": 122209.875, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.031646568328142166, | |
| "rewards/margins": 0.020655754953622818, | |
| "rewards/rejected": -0.052302323281764984, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.6813627254509018, | |
| "grad_norm": 8699839.812013036, | |
| "learning_rate": 1.770601336302895e-07, | |
| "logits/chosen": -2.3938724994659424, | |
| "logits/rejected": -2.3754312992095947, | |
| "logps/chosen": -115.58536529541016, | |
| "logps/rejected": -166.11984252929688, | |
| "loss": 116746.4, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.027456630021333694, | |
| "rewards/margins": 0.03820016235113144, | |
| "rewards/rejected": -0.06565678864717484, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.685370741482966, | |
| "grad_norm": 5919208.729707667, | |
| "learning_rate": 1.7483296213808462e-07, | |
| "logits/chosen": -2.4200167655944824, | |
| "logits/rejected": -2.427748680114746, | |
| "logps/chosen": -126.75040435791016, | |
| "logps/rejected": -168.77432250976562, | |
| "loss": 121633.425, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.029950793832540512, | |
| "rewards/margins": 0.03923628851771355, | |
| "rewards/rejected": -0.06918708235025406, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.6893787575150301, | |
| "grad_norm": 8189290.652671266, | |
| "learning_rate": 1.726057906458797e-07, | |
| "logits/chosen": -2.436396598815918, | |
| "logits/rejected": -2.3985111713409424, | |
| "logps/chosen": -133.26527404785156, | |
| "logps/rejected": -165.04611206054688, | |
| "loss": 124233.6125, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.033830661326646805, | |
| "rewards/margins": 0.033877044916152954, | |
| "rewards/rejected": -0.06770770251750946, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.6933867735470942, | |
| "grad_norm": 6543680.531937181, | |
| "learning_rate": 1.7037861915367482e-07, | |
| "logits/chosen": -2.3416950702667236, | |
| "logits/rejected": -2.358785629272461, | |
| "logps/chosen": -121.58663177490234, | |
| "logps/rejected": -172.85147094726562, | |
| "loss": 121040.9, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.024335870519280434, | |
| "rewards/margins": 0.03769830986857414, | |
| "rewards/rejected": -0.06203417852520943, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.6973947895791583, | |
| "grad_norm": 8413451.882571388, | |
| "learning_rate": 1.6815144766146993e-07, | |
| "logits/chosen": -2.5148143768310547, | |
| "logits/rejected": -2.5122790336608887, | |
| "logps/chosen": -131.95590209960938, | |
| "logps/rejected": -170.07913208007812, | |
| "loss": 122086.9875, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.02519279159605503, | |
| "rewards/margins": 0.03293418884277344, | |
| "rewards/rejected": -0.05812697485089302, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.7014028056112225, | |
| "grad_norm": 7990729.285338638, | |
| "learning_rate": 1.6592427616926502e-07, | |
| "logits/chosen": -2.424561023712158, | |
| "logits/rejected": -2.411344051361084, | |
| "logps/chosen": -112.82745361328125, | |
| "logps/rejected": -140.15585327148438, | |
| "loss": 122354.5875, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.024268418550491333, | |
| "rewards/margins": 0.021955247968435287, | |
| "rewards/rejected": -0.04622367024421692, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.7054108216432866, | |
| "grad_norm": 8082374.5587068405, | |
| "learning_rate": 1.6369710467706014e-07, | |
| "logits/chosen": -2.3887767791748047, | |
| "logits/rejected": -2.399949312210083, | |
| "logps/chosen": -113.41932678222656, | |
| "logps/rejected": -150.90052795410156, | |
| "loss": 123895.825, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.02654331922531128, | |
| "rewards/margins": 0.031898465007543564, | |
| "rewards/rejected": -0.058441780507564545, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.7094188376753507, | |
| "grad_norm": 10182582.529576585, | |
| "learning_rate": 1.6146993318485522e-07, | |
| "logits/chosen": -2.471140146255493, | |
| "logits/rejected": -2.464400291442871, | |
| "logps/chosen": -134.3041534423828, | |
| "logps/rejected": -168.62957763671875, | |
| "loss": 123414.175, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.028919730335474014, | |
| "rewards/margins": 0.032348960638046265, | |
| "rewards/rejected": -0.06126868724822998, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.7134268537074149, | |
| "grad_norm": 7012588.447398562, | |
| "learning_rate": 1.5924276169265034e-07, | |
| "logits/chosen": -2.4046647548675537, | |
| "logits/rejected": -2.436089277267456, | |
| "logps/chosen": -140.215087890625, | |
| "logps/rejected": -177.3277587890625, | |
| "loss": 116220.825, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.030313868075609207, | |
| "rewards/margins": 0.03892980143427849, | |
| "rewards/rejected": -0.0692436695098877, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.717434869739479, | |
| "grad_norm": 5846594.429433788, | |
| "learning_rate": 1.5701559020044543e-07, | |
| "logits/chosen": -2.5337796211242676, | |
| "logits/rejected": -2.4943432807922363, | |
| "logps/chosen": -124.1761474609375, | |
| "logps/rejected": -172.1324920654297, | |
| "loss": 123137.75, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.03101753070950508, | |
| "rewards/margins": 0.0418589822947979, | |
| "rewards/rejected": -0.07287651300430298, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.7214428857715431, | |
| "grad_norm": 7028764.493993101, | |
| "learning_rate": 1.5478841870824051e-07, | |
| "logits/chosen": -2.426300525665283, | |
| "logits/rejected": -2.387045383453369, | |
| "logps/chosen": -115.95497131347656, | |
| "logps/rejected": -157.61618041992188, | |
| "loss": 124143.675, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.024013713002204895, | |
| "rewards/margins": 0.037388551980257034, | |
| "rewards/rejected": -0.06140226125717163, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.7254509018036072, | |
| "grad_norm": 8234396.37962489, | |
| "learning_rate": 1.5256124721603563e-07, | |
| "logits/chosen": -2.3112475872039795, | |
| "logits/rejected": -2.310009479522705, | |
| "logps/chosen": -114.36012268066406, | |
| "logps/rejected": -153.5015411376953, | |
| "loss": 118239.1875, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.018304970115423203, | |
| "rewards/margins": 0.04038548097014427, | |
| "rewards/rejected": -0.058690451085567474, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.7294589178356713, | |
| "grad_norm": 8651551.474852078, | |
| "learning_rate": 1.5033407572383072e-07, | |
| "logits/chosen": -2.3074092864990234, | |
| "logits/rejected": -2.2610440254211426, | |
| "logps/chosen": -126.58909606933594, | |
| "logps/rejected": -180.2452392578125, | |
| "loss": 115241.575, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.027572233229875565, | |
| "rewards/margins": 0.04374260455369949, | |
| "rewards/rejected": -0.07131483405828476, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.7334669338677354, | |
| "grad_norm": 7273151.817120667, | |
| "learning_rate": 1.4810690423162583e-07, | |
| "logits/chosen": -2.2907938957214355, | |
| "logits/rejected": -2.241560459136963, | |
| "logps/chosen": -121.88932800292969, | |
| "logps/rejected": -165.8365020751953, | |
| "loss": 118192.675, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.030897384509444237, | |
| "rewards/margins": 0.042338501662015915, | |
| "rewards/rejected": -0.073235884308815, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.7374749498997996, | |
| "grad_norm": 8416170.338029677, | |
| "learning_rate": 1.4587973273942092e-07, | |
| "logits/chosen": -2.2940685749053955, | |
| "logits/rejected": -2.2601191997528076, | |
| "logps/chosen": -124.39451599121094, | |
| "logps/rejected": -161.70626831054688, | |
| "loss": 117542.4, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.029775938019156456, | |
| "rewards/margins": 0.03537831827998161, | |
| "rewards/rejected": -0.06515425443649292, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.7414829659318637, | |
| "grad_norm": 8984144.824878268, | |
| "learning_rate": 1.4365256124721603e-07, | |
| "logits/chosen": -2.4095590114593506, | |
| "logits/rejected": -2.382366418838501, | |
| "logps/chosen": -131.84091186523438, | |
| "logps/rejected": -171.8034210205078, | |
| "loss": 123931.3875, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.03394445404410362, | |
| "rewards/margins": 0.0342072993516922, | |
| "rewards/rejected": -0.06815175712108612, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.7454909819639278, | |
| "grad_norm": 9132360.1760025, | |
| "learning_rate": 1.4142538975501115e-07, | |
| "logits/chosen": -2.3694911003112793, | |
| "logits/rejected": -2.3512394428253174, | |
| "logps/chosen": -123.51580810546875, | |
| "logps/rejected": -167.21522521972656, | |
| "loss": 127071.8625, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.031071608886122704, | |
| "rewards/margins": 0.03928913176059723, | |
| "rewards/rejected": -0.07036073505878448, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.749498997995992, | |
| "grad_norm": 7139680.939654487, | |
| "learning_rate": 1.3919821826280624e-07, | |
| "logits/chosen": -2.462883472442627, | |
| "logits/rejected": -2.4619345664978027, | |
| "logps/chosen": -109.59526062011719, | |
| "logps/rejected": -158.25265502929688, | |
| "loss": 119525.8125, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.02276531606912613, | |
| "rewards/margins": 0.03841192647814751, | |
| "rewards/rejected": -0.06117723509669304, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.7535070140280561, | |
| "grad_norm": 9748149.33911386, | |
| "learning_rate": 1.3697104677060135e-07, | |
| "logits/chosen": -2.395458459854126, | |
| "logits/rejected": -2.3749117851257324, | |
| "logps/chosen": -123.3841552734375, | |
| "logps/rejected": -154.816650390625, | |
| "loss": 122715.85, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.03459615260362625, | |
| "rewards/margins": 0.02642343007028103, | |
| "rewards/rejected": -0.06101958826184273, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.7575150300601202, | |
| "grad_norm": 7062510.21247513, | |
| "learning_rate": 1.347438752783964e-07, | |
| "logits/chosen": -2.466752290725708, | |
| "logits/rejected": -2.4664688110351562, | |
| "logps/chosen": -117.7652587890625, | |
| "logps/rejected": -168.4532928466797, | |
| "loss": 122685.4375, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.026580441743135452, | |
| "rewards/margins": 0.039577435702085495, | |
| "rewards/rejected": -0.06615787744522095, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.7615230460921844, | |
| "grad_norm": 9095349.65659939, | |
| "learning_rate": 1.3251670378619153e-07, | |
| "logits/chosen": -2.340463161468506, | |
| "logits/rejected": -2.343043088912964, | |
| "logps/chosen": -135.51889038085938, | |
| "logps/rejected": -194.67893981933594, | |
| "loss": 124649.7875, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.03404935449361801, | |
| "rewards/margins": 0.039114292711019516, | |
| "rewards/rejected": -0.07316364347934723, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.7655310621242485, | |
| "grad_norm": 8798241.718444504, | |
| "learning_rate": 1.3028953229398661e-07, | |
| "logits/chosen": -2.417386054992676, | |
| "logits/rejected": -2.35686993598938, | |
| "logps/chosen": -126.17924499511719, | |
| "logps/rejected": -156.60858154296875, | |
| "loss": 126678.975, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.03539714962244034, | |
| "rewards/margins": 0.023838359862565994, | |
| "rewards/rejected": -0.05923551321029663, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.7695390781563126, | |
| "grad_norm": 8685556.241747925, | |
| "learning_rate": 1.2806236080178173e-07, | |
| "logits/chosen": -2.2988665103912354, | |
| "logits/rejected": -2.2867467403411865, | |
| "logps/chosen": -102.80122375488281, | |
| "logps/rejected": -136.88668823242188, | |
| "loss": 124837.6625, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.02582702599465847, | |
| "rewards/margins": 0.02396995946764946, | |
| "rewards/rejected": -0.04979699105024338, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.7735470941883767, | |
| "grad_norm": 7757034.4029932795, | |
| "learning_rate": 1.2583518930957684e-07, | |
| "logits/chosen": -2.3887832164764404, | |
| "logits/rejected": -2.388990640640259, | |
| "logps/chosen": -121.30000305175781, | |
| "logps/rejected": -180.59637451171875, | |
| "loss": 117815.775, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.025620033964514732, | |
| "rewards/margins": 0.048595868051052094, | |
| "rewards/rejected": -0.07421590387821198, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.7775551102204409, | |
| "grad_norm": 8307958.766272747, | |
| "learning_rate": 1.2360801781737193e-07, | |
| "logits/chosen": -2.38761568069458, | |
| "logits/rejected": -2.408409357070923, | |
| "logps/chosen": -121.0750732421875, | |
| "logps/rejected": -156.45553588867188, | |
| "loss": 127248.7375, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.02739100717008114, | |
| "rewards/margins": 0.03653334453701973, | |
| "rewards/rejected": -0.06392434984445572, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.781563126252505, | |
| "grad_norm": 7961844.607771009, | |
| "learning_rate": 1.2138084632516702e-07, | |
| "logits/chosen": -2.2147622108459473, | |
| "logits/rejected": -2.216984272003174, | |
| "logps/chosen": -137.97161865234375, | |
| "logps/rejected": -177.88926696777344, | |
| "loss": 117088.8875, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.027384892106056213, | |
| "rewards/margins": 0.04585758596658707, | |
| "rewards/rejected": -0.07324248552322388, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.7855711422845691, | |
| "grad_norm": 7970074.398916679, | |
| "learning_rate": 1.1915367483296213e-07, | |
| "logits/chosen": -2.431124210357666, | |
| "logits/rejected": -2.408353567123413, | |
| "logps/chosen": -131.7977752685547, | |
| "logps/rejected": -183.93661499023438, | |
| "loss": 118722.2375, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.0299037154763937, | |
| "rewards/margins": 0.045822691172361374, | |
| "rewards/rejected": -0.07572640478610992, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.7895791583166333, | |
| "grad_norm": 8486402.500741018, | |
| "learning_rate": 1.1692650334075723e-07, | |
| "logits/chosen": -2.373565196990967, | |
| "logits/rejected": -2.3279855251312256, | |
| "logps/chosen": -127.22279357910156, | |
| "logps/rejected": -165.14312744140625, | |
| "loss": 125666.025, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.036864422261714935, | |
| "rewards/margins": 0.034248046576976776, | |
| "rewards/rejected": -0.07111246883869171, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.7935871743486974, | |
| "grad_norm": 7781551.329745824, | |
| "learning_rate": 1.1469933184855234e-07, | |
| "logits/chosen": -2.3267343044281006, | |
| "logits/rejected": -2.3000550270080566, | |
| "logps/chosen": -114.78788757324219, | |
| "logps/rejected": -160.58119201660156, | |
| "loss": 117669.975, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.02355727180838585, | |
| "rewards/margins": 0.04111555963754654, | |
| "rewards/rejected": -0.06467284262180328, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.7975951903807615, | |
| "grad_norm": 7705308.394496826, | |
| "learning_rate": 1.1247216035634744e-07, | |
| "logits/chosen": -2.384936571121216, | |
| "logits/rejected": -2.3428092002868652, | |
| "logps/chosen": -118.7773208618164, | |
| "logps/rejected": -167.65505981445312, | |
| "loss": 119291.925, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.03314110264182091, | |
| "rewards/margins": 0.03820453956723213, | |
| "rewards/rejected": -0.07134564965963364, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.8016032064128257, | |
| "grad_norm": 9748026.950946445, | |
| "learning_rate": 1.1024498886414254e-07, | |
| "logits/chosen": -2.3572840690612793, | |
| "logits/rejected": -2.372260570526123, | |
| "logps/chosen": -141.3115997314453, | |
| "logps/rejected": -194.31414794921875, | |
| "loss": 121163.925, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.03314133733510971, | |
| "rewards/margins": 0.04883214458823204, | |
| "rewards/rejected": -0.08197349309921265, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.8056112224448898, | |
| "grad_norm": 11371440.513765983, | |
| "learning_rate": 1.0801781737193763e-07, | |
| "logits/chosen": -2.407268524169922, | |
| "logits/rejected": -2.329662799835205, | |
| "logps/chosen": -109.80204010009766, | |
| "logps/rejected": -165.47967529296875, | |
| "loss": 119808.175, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.029940223321318626, | |
| "rewards/margins": 0.042202599346637726, | |
| "rewards/rejected": -0.0721428170800209, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.8096192384769539, | |
| "grad_norm": 11530967.267240841, | |
| "learning_rate": 1.0579064587973273e-07, | |
| "logits/chosen": -2.4649455547332764, | |
| "logits/rejected": -2.4669435024261475, | |
| "logps/chosen": -153.43203735351562, | |
| "logps/rejected": -207.6525421142578, | |
| "loss": 120230.175, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.033086683601140976, | |
| "rewards/margins": 0.044571831822395325, | |
| "rewards/rejected": -0.077658511698246, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.8136272545090181, | |
| "grad_norm": 9145691.093056194, | |
| "learning_rate": 1.0356347438752784e-07, | |
| "logits/chosen": -2.3883135318756104, | |
| "logits/rejected": -2.393925666809082, | |
| "logps/chosen": -121.42464447021484, | |
| "logps/rejected": -175.5780029296875, | |
| "loss": 118819.75, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.025960665196180344, | |
| "rewards/margins": 0.038529325276613235, | |
| "rewards/rejected": -0.06448998302221298, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.8176352705410822, | |
| "grad_norm": 9873094.955285586, | |
| "learning_rate": 1.0133630289532294e-07, | |
| "logits/chosen": -2.3716039657592773, | |
| "logits/rejected": -2.3830373287200928, | |
| "logps/chosen": -113.73304748535156, | |
| "logps/rejected": -154.63546752929688, | |
| "loss": 126263.6, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.027685221284627914, | |
| "rewards/margins": 0.03748108074069023, | |
| "rewards/rejected": -0.06516630947589874, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.8216432865731463, | |
| "grad_norm": 9115873.458954994, | |
| "learning_rate": 9.910913140311804e-08, | |
| "logits/chosen": -2.479027032852173, | |
| "logits/rejected": -2.490036964416504, | |
| "logps/chosen": -132.74807739257812, | |
| "logps/rejected": -184.6726531982422, | |
| "loss": 118780.575, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.02712252177298069, | |
| "rewards/margins": 0.04764852300286293, | |
| "rewards/rejected": -0.07477104663848877, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.8256513026052105, | |
| "grad_norm": 8953461.219512891, | |
| "learning_rate": 9.688195991091313e-08, | |
| "logits/chosen": -2.465446949005127, | |
| "logits/rejected": -2.4427990913391113, | |
| "logps/chosen": -119.0552978515625, | |
| "logps/rejected": -161.92874145507812, | |
| "loss": 119633.0625, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.025605756789445877, | |
| "rewards/margins": 0.040582504123449326, | |
| "rewards/rejected": -0.0661882609128952, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.8296593186372746, | |
| "grad_norm": 7851765.489091888, | |
| "learning_rate": 9.465478841870823e-08, | |
| "logits/chosen": -2.3986716270446777, | |
| "logits/rejected": -2.411012887954712, | |
| "logps/chosen": -118.712646484375, | |
| "logps/rejected": -173.01612854003906, | |
| "loss": 119445.7875, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.024571005254983902, | |
| "rewards/margins": 0.04814226180315018, | |
| "rewards/rejected": -0.07271327078342438, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.8336673346693386, | |
| "grad_norm": 7783572.975882292, | |
| "learning_rate": 9.242761692650333e-08, | |
| "logits/chosen": -2.4087226390838623, | |
| "logits/rejected": -2.4295570850372314, | |
| "logps/chosen": -107.88726806640625, | |
| "logps/rejected": -147.86387634277344, | |
| "loss": 121158.875, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.028421640396118164, | |
| "rewards/margins": 0.03798586130142212, | |
| "rewards/rejected": -0.06640749424695969, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.8376753507014028, | |
| "grad_norm": 13856515.926379297, | |
| "learning_rate": 9.020044543429844e-08, | |
| "logits/chosen": -2.3862414360046387, | |
| "logits/rejected": -2.3929479122161865, | |
| "logps/chosen": -128.18722534179688, | |
| "logps/rejected": -179.44863891601562, | |
| "loss": 118360.075, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.02913135662674904, | |
| "rewards/margins": 0.039204858243465424, | |
| "rewards/rejected": -0.06833621114492416, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.8416833667334669, | |
| "grad_norm": 7032800.434205612, | |
| "learning_rate": 8.797327394209354e-08, | |
| "logits/chosen": -2.3820009231567383, | |
| "logits/rejected": -2.3427934646606445, | |
| "logps/chosen": -117.68209075927734, | |
| "logps/rejected": -162.79541015625, | |
| "loss": 124612.8375, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.024324778467416763, | |
| "rewards/margins": 0.03295541927218437, | |
| "rewards/rejected": -0.05728019401431084, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.845691382765531, | |
| "grad_norm": 6171252.074137294, | |
| "learning_rate": 8.574610244988864e-08, | |
| "logits/chosen": -2.397584915161133, | |
| "logits/rejected": -2.371406078338623, | |
| "logps/chosen": -109.43270111083984, | |
| "logps/rejected": -151.5046844482422, | |
| "loss": 119086.7625, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.02608482912182808, | |
| "rewards/margins": 0.03447579964995384, | |
| "rewards/rejected": -0.06056063249707222, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.8496993987975952, | |
| "grad_norm": 10404482.46317683, | |
| "learning_rate": 8.351893095768374e-08, | |
| "logits/chosen": -2.363708019256592, | |
| "logits/rejected": -2.3750388622283936, | |
| "logps/chosen": -124.8462905883789, | |
| "logps/rejected": -160.50111389160156, | |
| "loss": 120726.7, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.025790056213736534, | |
| "rewards/margins": 0.0353570319712162, | |
| "rewards/rejected": -0.06114708259701729, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.8537074148296593, | |
| "grad_norm": 10159471.548415452, | |
| "learning_rate": 8.129175946547884e-08, | |
| "logits/chosen": -2.40871262550354, | |
| "logits/rejected": -2.385307788848877, | |
| "logps/chosen": -123.74516296386719, | |
| "logps/rejected": -174.9013671875, | |
| "loss": 117929.3, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.02134835720062256, | |
| "rewards/margins": 0.053267043083906174, | |
| "rewards/rejected": -0.07461539655923843, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.8577154308617234, | |
| "grad_norm": 9748215.648259088, | |
| "learning_rate": 7.906458797327394e-08, | |
| "logits/chosen": -2.3864364624023438, | |
| "logits/rejected": -2.384763479232788, | |
| "logps/chosen": -124.77516174316406, | |
| "logps/rejected": -187.81178283691406, | |
| "loss": 114719.2875, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.028009647503495216, | |
| "rewards/margins": 0.057968758046627045, | |
| "rewards/rejected": -0.08597840368747711, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.8617234468937875, | |
| "grad_norm": 8214050.893974697, | |
| "learning_rate": 7.683741648106903e-08, | |
| "logits/chosen": -2.339771270751953, | |
| "logits/rejected": -2.3055481910705566, | |
| "logps/chosen": -122.35482025146484, | |
| "logps/rejected": -162.99026489257812, | |
| "loss": 123583.925, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.03496576473116875, | |
| "rewards/margins": 0.031746573746204376, | |
| "rewards/rejected": -0.06671233475208282, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.8657314629258517, | |
| "grad_norm": 7550929.657836777, | |
| "learning_rate": 7.461024498886414e-08, | |
| "logits/chosen": -2.3592019081115723, | |
| "logits/rejected": -2.404470443725586, | |
| "logps/chosen": -107.17295837402344, | |
| "logps/rejected": -150.08053588867188, | |
| "loss": 119529.6875, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.024070553481578827, | |
| "rewards/margins": 0.035172443836927414, | |
| "rewards/rejected": -0.05924300104379654, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.8697394789579158, | |
| "grad_norm": 9141394.945114018, | |
| "learning_rate": 7.238307349665924e-08, | |
| "logits/chosen": -2.3597967624664307, | |
| "logits/rejected": -2.3456203937530518, | |
| "logps/chosen": -132.2399139404297, | |
| "logps/rejected": -190.35842895507812, | |
| "loss": 117310.2625, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.030771303921937943, | |
| "rewards/margins": 0.04350388050079346, | |
| "rewards/rejected": -0.0742751806974411, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.87374749498998, | |
| "grad_norm": 9640801.577136654, | |
| "learning_rate": 7.015590200445435e-08, | |
| "logits/chosen": -2.315046548843384, | |
| "logits/rejected": -2.2832789421081543, | |
| "logps/chosen": -123.76663970947266, | |
| "logps/rejected": -180.56610107421875, | |
| "loss": 122200.8125, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.02636173740029335, | |
| "rewards/margins": 0.05408860370516777, | |
| "rewards/rejected": -0.08045034110546112, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.8777555110220441, | |
| "grad_norm": 9115013.555540964, | |
| "learning_rate": 6.792873051224945e-08, | |
| "logits/chosen": -2.341780662536621, | |
| "logits/rejected": -2.303041934967041, | |
| "logps/chosen": -120.33302307128906, | |
| "logps/rejected": -179.38143920898438, | |
| "loss": 119841.1375, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.028819095343351364, | |
| "rewards/margins": 0.05842950940132141, | |
| "rewards/rejected": -0.08724860846996307, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.8817635270541082, | |
| "grad_norm": 9287669.565738013, | |
| "learning_rate": 6.570155902004454e-08, | |
| "logits/chosen": -2.3970108032226562, | |
| "logits/rejected": -2.3846933841705322, | |
| "logps/chosen": -134.4124755859375, | |
| "logps/rejected": -194.52996826171875, | |
| "loss": 113952.85, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.030755961313843727, | |
| "rewards/margins": 0.059903584420681, | |
| "rewards/rejected": -0.09065954387187958, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.8857715430861723, | |
| "grad_norm": 8780636.26287871, | |
| "learning_rate": 6.347438752783964e-08, | |
| "logits/chosen": -2.3586363792419434, | |
| "logits/rejected": -2.341787815093994, | |
| "logps/chosen": -121.4832763671875, | |
| "logps/rejected": -174.41322326660156, | |
| "loss": 125072.6125, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.034476399421691895, | |
| "rewards/margins": 0.04450554400682449, | |
| "rewards/rejected": -0.07898194342851639, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.8897795591182365, | |
| "grad_norm": 11734592.621806614, | |
| "learning_rate": 6.124721603563474e-08, | |
| "logits/chosen": -2.3850929737091064, | |
| "logits/rejected": -2.3612587451934814, | |
| "logps/chosen": -127.06380462646484, | |
| "logps/rejected": -176.50833129882812, | |
| "loss": 127903.1, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.03346968814730644, | |
| "rewards/margins": 0.045090578496456146, | |
| "rewards/rejected": -0.07856027781963348, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.8937875751503006, | |
| "grad_norm": 9158318.910190664, | |
| "learning_rate": 5.902004454342984e-08, | |
| "logits/chosen": -2.3465323448181152, | |
| "logits/rejected": -2.3347389698028564, | |
| "logps/chosen": -114.64030456542969, | |
| "logps/rejected": -158.58358764648438, | |
| "loss": 119119.75, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.03357522934675217, | |
| "rewards/margins": 0.03692323714494705, | |
| "rewards/rejected": -0.07049846649169922, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.8977955911823647, | |
| "grad_norm": 9433469.034726756, | |
| "learning_rate": 5.679287305122494e-08, | |
| "logits/chosen": -2.3580026626586914, | |
| "logits/rejected": -2.3229854106903076, | |
| "logps/chosen": -145.0024871826172, | |
| "logps/rejected": -185.17398071289062, | |
| "loss": 123208.3625, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.0344650074839592, | |
| "rewards/margins": 0.03345141187310219, | |
| "rewards/rejected": -0.06791641563177109, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.9018036072144289, | |
| "grad_norm": 10459560.929278648, | |
| "learning_rate": 5.456570155902004e-08, | |
| "logits/chosen": -2.2590279579162598, | |
| "logits/rejected": -2.2098376750946045, | |
| "logps/chosen": -125.89430236816406, | |
| "logps/rejected": -180.64013671875, | |
| "loss": 119874.9125, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -0.030151482671499252, | |
| "rewards/margins": 0.0487741082906723, | |
| "rewards/rejected": -0.07892559468746185, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.905811623246493, | |
| "grad_norm": 9239966.29275951, | |
| "learning_rate": 5.233853006681514e-08, | |
| "logits/chosen": -2.219543933868408, | |
| "logits/rejected": -2.1673264503479004, | |
| "logps/chosen": -115.83148193359375, | |
| "logps/rejected": -181.96237182617188, | |
| "loss": 116310.4625, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.030166417360305786, | |
| "rewards/margins": 0.05468549206852913, | |
| "rewards/rejected": -0.08485190570354462, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.9098196392785571, | |
| "grad_norm": 7137384.451143832, | |
| "learning_rate": 5.0111358574610243e-08, | |
| "logits/chosen": -2.3517508506774902, | |
| "logits/rejected": -2.328963279724121, | |
| "logps/chosen": -124.4570541381836, | |
| "logps/rejected": -155.75506591796875, | |
| "loss": 125401.625, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.03177972882986069, | |
| "rewards/margins": 0.028021136298775673, | |
| "rewards/rejected": -0.05980087071657181, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.9138276553106213, | |
| "grad_norm": 9374102.140787963, | |
| "learning_rate": 4.7884187082405345e-08, | |
| "logits/chosen": -2.36897611618042, | |
| "logits/rejected": -2.351210832595825, | |
| "logps/chosen": -121.159423828125, | |
| "logps/rejected": -162.73583984375, | |
| "loss": 122636.2125, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.027210911735892296, | |
| "rewards/margins": 0.037821024656295776, | |
| "rewards/rejected": -0.06503193080425262, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.9178356713426854, | |
| "grad_norm": 10634549.480236543, | |
| "learning_rate": 4.5657015590200446e-08, | |
| "logits/chosen": -2.3483309745788574, | |
| "logits/rejected": -2.3582046031951904, | |
| "logps/chosen": -129.0653839111328, | |
| "logps/rejected": -160.6099853515625, | |
| "loss": 120660.725, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.02793022058904171, | |
| "rewards/margins": 0.028678078204393387, | |
| "rewards/rejected": -0.056608300656080246, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.9218436873747495, | |
| "grad_norm": 9025337.740427457, | |
| "learning_rate": 4.342984409799554e-08, | |
| "logits/chosen": -2.3706603050231934, | |
| "logits/rejected": -2.369664192199707, | |
| "logps/chosen": -127.57081604003906, | |
| "logps/rejected": -188.29193115234375, | |
| "loss": 115251.525, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.02242148667573929, | |
| "rewards/margins": 0.0556727834045887, | |
| "rewards/rejected": -0.07809427380561829, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.9258517034068137, | |
| "grad_norm": 7848051.255311885, | |
| "learning_rate": 4.120267260579064e-08, | |
| "logits/chosen": -2.3117566108703613, | |
| "logits/rejected": -2.3098156452178955, | |
| "logps/chosen": -108.4315185546875, | |
| "logps/rejected": -150.93917846679688, | |
| "loss": 118349.0, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.029692724347114563, | |
| "rewards/margins": 0.03289476037025452, | |
| "rewards/rejected": -0.06258748471736908, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.9298597194388778, | |
| "grad_norm": 8752220.280733073, | |
| "learning_rate": 3.897550111358574e-08, | |
| "logits/chosen": -2.440371036529541, | |
| "logits/rejected": -2.396395444869995, | |
| "logps/chosen": -118.261962890625, | |
| "logps/rejected": -177.1492462158203, | |
| "loss": 118122.25, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.02859870158135891, | |
| "rewards/margins": 0.04976039007306099, | |
| "rewards/rejected": -0.07835908979177475, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.9338677354709419, | |
| "grad_norm": 8570641.436970409, | |
| "learning_rate": 3.6748329621380844e-08, | |
| "logits/chosen": -2.3024837970733643, | |
| "logits/rejected": -2.2734124660491943, | |
| "logps/chosen": -100.15013122558594, | |
| "logps/rejected": -150.4217987060547, | |
| "loss": 121363.625, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.017847072333097458, | |
| "rewards/margins": 0.041332632303237915, | |
| "rewards/rejected": -0.05917970463633537, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.9378757515030061, | |
| "grad_norm": 10125338.403382758, | |
| "learning_rate": 3.4521158129175945e-08, | |
| "logits/chosen": -2.3241610527038574, | |
| "logits/rejected": -2.288438081741333, | |
| "logps/chosen": -140.2849578857422, | |
| "logps/rejected": -185.48062133789062, | |
| "loss": 116049.775, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.023854615166783333, | |
| "rewards/margins": 0.04641326889395714, | |
| "rewards/rejected": -0.07026788592338562, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.9418837675350702, | |
| "grad_norm": 8270337.589395198, | |
| "learning_rate": 3.2293986636971046e-08, | |
| "logits/chosen": -2.3693883419036865, | |
| "logits/rejected": -2.329385995864868, | |
| "logps/chosen": -129.80262756347656, | |
| "logps/rejected": -171.2855224609375, | |
| "loss": 126379.775, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.03462721034884453, | |
| "rewards/margins": 0.032999541610479355, | |
| "rewards/rejected": -0.06762675940990448, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.9458917835671342, | |
| "grad_norm": 9891605.681991456, | |
| "learning_rate": 3.006681514476615e-08, | |
| "logits/chosen": -2.339646339416504, | |
| "logits/rejected": -2.3495984077453613, | |
| "logps/chosen": -112.51835632324219, | |
| "logps/rejected": -153.22071838378906, | |
| "loss": 119129.5875, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.029641568660736084, | |
| "rewards/margins": 0.036299534142017365, | |
| "rewards/rejected": -0.06594111025333405, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.9498997995991983, | |
| "grad_norm": 8002327.473093823, | |
| "learning_rate": 2.783964365256125e-08, | |
| "logits/chosen": -2.2044544219970703, | |
| "logits/rejected": -2.1942667961120605, | |
| "logps/chosen": -125.50931549072266, | |
| "logps/rejected": -182.1253204345703, | |
| "loss": 120835.975, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.030479159206151962, | |
| "rewards/margins": 0.04504828527569771, | |
| "rewards/rejected": -0.07552744448184967, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.9539078156312625, | |
| "grad_norm": 9473260.065683817, | |
| "learning_rate": 2.5612472160356346e-08, | |
| "logits/chosen": -2.3253302574157715, | |
| "logits/rejected": -2.30558443069458, | |
| "logps/chosen": -120.88480377197266, | |
| "logps/rejected": -174.08871459960938, | |
| "loss": 116823.8375, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.030502652749419212, | |
| "rewards/margins": 0.04683176428079605, | |
| "rewards/rejected": -0.07733441144227982, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.9579158316633266, | |
| "grad_norm": 12154341.360154865, | |
| "learning_rate": 2.3385300668151448e-08, | |
| "logits/chosen": -2.308772563934326, | |
| "logits/rejected": -2.3092708587646484, | |
| "logps/chosen": -114.74493408203125, | |
| "logps/rejected": -176.45138549804688, | |
| "loss": 115003.975, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.03354664891958237, | |
| "rewards/margins": 0.0526542142033577, | |
| "rewards/rejected": -0.08620086312294006, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.9619238476953907, | |
| "grad_norm": 7519811.285504107, | |
| "learning_rate": 2.1158129175946545e-08, | |
| "logits/chosen": -2.312025785446167, | |
| "logits/rejected": -2.3464319705963135, | |
| "logps/chosen": -119.38179779052734, | |
| "logps/rejected": -166.91592407226562, | |
| "loss": 122030.7125, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.02894437685608864, | |
| "rewards/margins": 0.045145101845264435, | |
| "rewards/rejected": -0.07408948242664337, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.9659318637274549, | |
| "grad_norm": 8883024.450495958, | |
| "learning_rate": 1.8930957683741647e-08, | |
| "logits/chosen": -2.344897747039795, | |
| "logits/rejected": -2.285876512527466, | |
| "logps/chosen": -111.0980224609375, | |
| "logps/rejected": -164.24978637695312, | |
| "loss": 121476.0125, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.026049071922898293, | |
| "rewards/margins": 0.048600487411022186, | |
| "rewards/rejected": -0.07464955747127533, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.969939879759519, | |
| "grad_norm": 10097197.568430113, | |
| "learning_rate": 1.6703786191536748e-08, | |
| "logits/chosen": -2.388754367828369, | |
| "logits/rejected": -2.3738179206848145, | |
| "logps/chosen": -125.5340576171875, | |
| "logps/rejected": -173.31170654296875, | |
| "loss": 121610.525, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.02290264144539833, | |
| "rewards/margins": 0.04468691721558571, | |
| "rewards/rejected": -0.06758955866098404, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.9739478957915831, | |
| "grad_norm": 11378129.987507869, | |
| "learning_rate": 1.4476614699331847e-08, | |
| "logits/chosen": -2.3605947494506836, | |
| "logits/rejected": -2.3231589794158936, | |
| "logps/chosen": -117.72758483886719, | |
| "logps/rejected": -183.6721649169922, | |
| "loss": 119195.675, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.027614343911409378, | |
| "rewards/margins": 0.05597345903515816, | |
| "rewards/rejected": -0.08358780294656754, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.9779559118236473, | |
| "grad_norm": 8462760.728667326, | |
| "learning_rate": 1.2249443207126947e-08, | |
| "logits/chosen": -2.2801132202148438, | |
| "logits/rejected": -2.292315721511841, | |
| "logps/chosen": -127.86152648925781, | |
| "logps/rejected": -175.5904083251953, | |
| "loss": 124485.325, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.03231586143374443, | |
| "rewards/margins": 0.03901532292366028, | |
| "rewards/rejected": -0.07133118063211441, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.9819639278557114, | |
| "grad_norm": 9243740.732393652, | |
| "learning_rate": 1.002227171492205e-08, | |
| "logits/chosen": -2.277127981185913, | |
| "logits/rejected": -2.2810211181640625, | |
| "logps/chosen": -117.18321228027344, | |
| "logps/rejected": -148.31886291503906, | |
| "loss": 127954.8875, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.025695014744997025, | |
| "rewards/margins": 0.026529842987656593, | |
| "rewards/rejected": -0.052224863320589066, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.9859719438877755, | |
| "grad_norm": 9218086.60683555, | |
| "learning_rate": 7.79510022271715e-09, | |
| "logits/chosen": -2.361515998840332, | |
| "logits/rejected": -2.3449158668518066, | |
| "logps/chosen": -115.47230529785156, | |
| "logps/rejected": -134.2152099609375, | |
| "loss": 121513.8, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.02808554843068123, | |
| "rewards/margins": 0.022291336208581924, | |
| "rewards/rejected": -0.05037688463926315, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.9899799599198397, | |
| "grad_norm": 8767440.113062855, | |
| "learning_rate": 5.5679287305122495e-09, | |
| "logits/chosen": -2.3940796852111816, | |
| "logits/rejected": -2.373922824859619, | |
| "logps/chosen": -146.56610107421875, | |
| "logps/rejected": -181.4602508544922, | |
| "loss": 121378.2375, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -0.030033543705940247, | |
| "rewards/margins": 0.033586207777261734, | |
| "rewards/rejected": -0.06361975520849228, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.9939879759519038, | |
| "grad_norm": 10436515.838738332, | |
| "learning_rate": 3.3407572383073495e-09, | |
| "logits/chosen": -2.4080631732940674, | |
| "logits/rejected": -2.3511130809783936, | |
| "logps/chosen": -110.88321685791016, | |
| "logps/rejected": -157.0704803466797, | |
| "loss": 123335.75, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.02597566321492195, | |
| "rewards/margins": 0.04258622229099274, | |
| "rewards/rejected": -0.06856188923120499, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.9979959919839679, | |
| "grad_norm": 12722707.579726782, | |
| "learning_rate": 1.1135857461024498e-09, | |
| "logits/chosen": -2.2917561531066895, | |
| "logits/rejected": -2.238448143005371, | |
| "logps/chosen": -122.59525299072266, | |
| "logps/rejected": -173.32418823242188, | |
| "loss": 120410.5375, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.02627197466790676, | |
| "rewards/margins": 0.046644873917102814, | |
| "rewards/rejected": -0.07291685789823532, | |
| "step": 2490 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2495, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |