| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9871794871794872, | |
| "eval_steps": 500, | |
| "global_step": 699, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.021367521367521368, | |
| "grad_norm": 16.125, | |
| "learning_rate": 1.7556090538745385e-06, | |
| "logits/chosen": -3.5722389221191406, | |
| "logits/rejected": -3.5034377574920654, | |
| "logps/chosen": -41.095054626464844, | |
| "logps/rejected": -79.83882141113281, | |
| "loss": 0.6922, | |
| "rewards/accuracies": 0.3500000238418579, | |
| "rewards/chosen": 0.0014523781137540936, | |
| "rewards/margins": 0.001936142100021243, | |
| "rewards/rejected": -0.0004837641608901322, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.042735042735042736, | |
| "grad_norm": 16.75, | |
| "learning_rate": 3.950120371217711e-06, | |
| "logits/chosen": -3.586623430252075, | |
| "logits/rejected": -3.506187915802002, | |
| "logps/chosen": -40.016441345214844, | |
| "logps/rejected": -78.24286651611328, | |
| "loss": 0.6796, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -0.002693606074899435, | |
| "rewards/margins": 0.027483653277158737, | |
| "rewards/rejected": -0.03017725981771946, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0641025641025641, | |
| "grad_norm": 14.75, | |
| "learning_rate": 6.144631688560886e-06, | |
| "logits/chosen": -3.5991673469543457, | |
| "logits/rejected": -3.50789213180542, | |
| "logps/chosen": -39.836097717285156, | |
| "logps/rejected": -81.42815399169922, | |
| "loss": 0.6306, | |
| "rewards/accuracies": 0.9916666746139526, | |
| "rewards/chosen": -0.006502463016659021, | |
| "rewards/margins": 0.1313387155532837, | |
| "rewards/rejected": -0.13784119486808777, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.08547008547008547, | |
| "grad_norm": 13.625, | |
| "learning_rate": 8.339143005904057e-06, | |
| "logits/chosen": -3.5530943870544434, | |
| "logits/rejected": -3.492051601409912, | |
| "logps/chosen": -39.75938034057617, | |
| "logps/rejected": -80.72886657714844, | |
| "loss": 0.5434, | |
| "rewards/accuracies": 0.9791666865348816, | |
| "rewards/chosen": -0.015247734263539314, | |
| "rewards/margins": 0.33922019600868225, | |
| "rewards/rejected": -0.3544679284095764, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.10683760683760683, | |
| "grad_norm": 11.1875, | |
| "learning_rate": 1.0533654323247232e-05, | |
| "logits/chosen": -3.512582302093506, | |
| "logits/rejected": -3.4719951152801514, | |
| "logps/chosen": -39.996891021728516, | |
| "logps/rejected": -85.54742431640625, | |
| "loss": 0.3961, | |
| "rewards/accuracies": 0.98333340883255, | |
| "rewards/chosen": -0.04132762551307678, | |
| "rewards/margins": 0.7773466110229492, | |
| "rewards/rejected": -0.8186742067337036, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.1282051282051282, | |
| "grad_norm": 7.5625, | |
| "learning_rate": 1.2728165640590407e-05, | |
| "logits/chosen": -3.4401345252990723, | |
| "logits/rejected": -3.4451375007629395, | |
| "logps/chosen": -42.438926696777344, | |
| "logps/rejected": -93.06343078613281, | |
| "loss": 0.2584, | |
| "rewards/accuracies": 0.9958332777023315, | |
| "rewards/chosen": -0.13156814873218536, | |
| "rewards/margins": 1.4302090406417847, | |
| "rewards/rejected": -1.561777114868164, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.14957264957264957, | |
| "grad_norm": 4.28125, | |
| "learning_rate": 1.4922676957933578e-05, | |
| "logits/chosen": -3.220163345336914, | |
| "logits/rejected": -3.2657477855682373, | |
| "logps/chosen": -44.191307067871094, | |
| "logps/rejected": -108.76808166503906, | |
| "loss": 0.1195, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.4397502541542053, | |
| "rewards/margins": 2.7580618858337402, | |
| "rewards/rejected": -3.19781231880188, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.17094017094017094, | |
| "grad_norm": 2.0625, | |
| "learning_rate": 1.5360556888469565e-05, | |
| "logits/chosen": -2.7749853134155273, | |
| "logits/rejected": -2.807795286178589, | |
| "logps/chosen": -52.53513717651367, | |
| "logps/rejected": -143.27357482910156, | |
| "loss": 0.0426, | |
| "rewards/accuracies": 0.9916666746139526, | |
| "rewards/chosen": -1.2650867700576782, | |
| "rewards/margins": 5.236789226531982, | |
| "rewards/rejected": -6.501875877380371, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.19230769230769232, | |
| "grad_norm": 1.71875, | |
| "learning_rate": 1.535640428282884e-05, | |
| "logits/chosen": -2.3302321434020996, | |
| "logits/rejected": -2.289696216583252, | |
| "logps/chosen": -51.40728759765625, | |
| "logps/rejected": -162.12364196777344, | |
| "loss": 0.0168, | |
| "rewards/accuracies": 0.9958333969116211, | |
| "rewards/chosen": -1.279262900352478, | |
| "rewards/margins": 7.274726867675781, | |
| "rewards/rejected": -8.553990364074707, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.21367521367521367, | |
| "grad_norm": 6.5, | |
| "learning_rate": 1.5349059809872097e-05, | |
| "logits/chosen": -1.9786951541900635, | |
| "logits/rejected": -1.9170547723770142, | |
| "logps/chosen": -57.05157470703125, | |
| "logps/rejected": -180.14271545410156, | |
| "loss": 0.0191, | |
| "rewards/accuracies": 0.9958333969116211, | |
| "rewards/chosen": -1.664361596107483, | |
| "rewards/margins": 8.5733003616333, | |
| "rewards/rejected": -10.237661361694336, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.23504273504273504, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 1.5338527542732884e-05, | |
| "logits/chosen": -1.976489782333374, | |
| "logits/rejected": -1.909419059753418, | |
| "logps/chosen": -58.60235595703125, | |
| "logps/rejected": -196.4000701904297, | |
| "loss": 0.0097, | |
| "rewards/accuracies": 0.9958332777023315, | |
| "rewards/chosen": -1.8706943988800049, | |
| "rewards/margins": 9.873316764831543, | |
| "rewards/rejected": -11.744011878967285, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.2564102564102564, | |
| "grad_norm": 3.234375, | |
| "learning_rate": 1.532481332244717e-05, | |
| "logits/chosen": -1.9295637607574463, | |
| "logits/rejected": -1.817983627319336, | |
| "logps/chosen": -57.39198684692383, | |
| "logps/rejected": -199.2140350341797, | |
| "loss": 0.0047, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.8252334594726562, | |
| "rewards/margins": 10.293539047241211, | |
| "rewards/rejected": -12.11877155303955, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2777777777777778, | |
| "grad_norm": 9.125, | |
| "learning_rate": 1.5307924754713968e-05, | |
| "logits/chosen": -1.9402471780776978, | |
| "logits/rejected": -1.8663572072982788, | |
| "logps/chosen": -59.858306884765625, | |
| "logps/rejected": -196.60189819335938, | |
| "loss": 0.0108, | |
| "rewards/accuracies": 0.9958333969116211, | |
| "rewards/chosen": -1.9207541942596436, | |
| "rewards/margins": 9.978109359741211, | |
| "rewards/rejected": -11.89886474609375, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.29914529914529914, | |
| "grad_norm": 0.298828125, | |
| "learning_rate": 1.528787120567736e-05, | |
| "logits/chosen": -2.046313762664795, | |
| "logits/rejected": -1.9556039571762085, | |
| "logps/chosen": -54.13606643676758, | |
| "logps/rejected": -192.9593963623047, | |
| "loss": 0.0026, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.4830687046051025, | |
| "rewards/margins": 10.119759559631348, | |
| "rewards/rejected": -11.602827072143555, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.32051282051282054, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 1.526466379673215e-05, | |
| "logits/chosen": -2.0463297367095947, | |
| "logits/rejected": -1.9465014934539795, | |
| "logps/chosen": -53.60888671875, | |
| "logps/rejected": -200.31202697753906, | |
| "loss": 0.0074, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.4517700672149658, | |
| "rewards/margins": 10.894501686096191, | |
| "rewards/rejected": -12.346272468566895, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.3418803418803419, | |
| "grad_norm": 14.0, | |
| "learning_rate": 1.5238315398356126e-05, | |
| "logits/chosen": -1.9893850088119507, | |
| "logits/rejected": -1.8696527481079102, | |
| "logps/chosen": -55.793907165527344, | |
| "logps/rejected": -205.0140838623047, | |
| "loss": 0.0099, | |
| "rewards/accuracies": 0.9958333969116211, | |
| "rewards/chosen": -1.6301085948944092, | |
| "rewards/margins": 11.256315231323242, | |
| "rewards/rejected": -12.88642406463623, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.36324786324786323, | |
| "grad_norm": 0.040283203125, | |
| "learning_rate": 1.5208840622972272e-05, | |
| "logits/chosen": -1.9942238330841064, | |
| "logits/rejected": -1.8389371633529663, | |
| "logps/chosen": -60.305450439453125, | |
| "logps/rejected": -209.89016723632812, | |
| "loss": 0.0228, | |
| "rewards/accuracies": 0.9958333969116211, | |
| "rewards/chosen": -1.8958022594451904, | |
| "rewards/margins": 11.26887321472168, | |
| "rewards/rejected": -13.164674758911133, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.38461538461538464, | |
| "grad_norm": 0.68359375, | |
| "learning_rate": 1.5176255816844948e-05, | |
| "logits/chosen": -1.958398461341858, | |
| "logits/rejected": -1.7746648788452148, | |
| "logps/chosen": -53.41706466674805, | |
| "logps/rejected": -208.37869262695312, | |
| "loss": 0.0067, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.439720869064331, | |
| "rewards/margins": 11.646267890930176, | |
| "rewards/rejected": -13.08598804473877, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.405982905982906, | |
| "grad_norm": 0.67578125, | |
| "learning_rate": 1.5140579051014502e-05, | |
| "logits/chosen": -1.9176127910614014, | |
| "logits/rejected": -1.6993322372436523, | |
| "logps/chosen": -57.2091178894043, | |
| "logps/rejected": -224.92782592773438, | |
| "loss": 0.0026, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.716392159461975, | |
| "rewards/margins": 12.696627616882324, | |
| "rewards/rejected": -14.413020133972168, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.42735042735042733, | |
| "grad_norm": 0.3203125, | |
| "learning_rate": 1.5101830111275334e-05, | |
| "logits/chosen": -1.874871015548706, | |
| "logits/rejected": -1.682807207107544, | |
| "logps/chosen": -63.0499267578125, | |
| "logps/rejected": -215.8572235107422, | |
| "loss": 0.0118, | |
| "rewards/accuracies": 0.9958333969116211, | |
| "rewards/chosen": -2.3144705295562744, | |
| "rewards/margins": 11.472696304321289, | |
| "rewards/rejected": -13.787165641784668, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.44871794871794873, | |
| "grad_norm": 4.21875, | |
| "learning_rate": 1.5060030487203004e-05, | |
| "logits/chosen": -1.8294957876205444, | |
| "logits/rejected": -1.583496332168579, | |
| "logps/chosen": -67.67689514160156, | |
| "logps/rejected": -231.562744140625, | |
| "loss": 0.006, | |
| "rewards/accuracies": 0.9958333969116211, | |
| "rewards/chosen": -2.7576889991760254, | |
| "rewards/margins": 12.489922523498535, | |
| "rewards/rejected": -15.247611999511719, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.4700854700854701, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 1.501520336023643e-05, | |
| "logits/chosen": -1.7654807567596436, | |
| "logits/rejected": -1.4742016792297363, | |
| "logps/chosen": -66.52511596679688, | |
| "logps/rejected": -227.6389923095703, | |
| "loss": 0.0016, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.638422966003418, | |
| "rewards/margins": 12.420553207397461, | |
| "rewards/rejected": -15.058975219726562, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.49145299145299143, | |
| "grad_norm": 1.7421875, | |
| "learning_rate": 1.4967373590821828e-05, | |
| "logits/chosen": -1.7195736169815063, | |
| "logits/rejected": -1.4602675437927246, | |
| "logps/chosen": -63.84660720825195, | |
| "logps/rejected": -229.90017700195312, | |
| "loss": 0.0106, | |
| "rewards/accuracies": 0.9916666746139526, | |
| "rewards/chosen": -2.3248353004455566, | |
| "rewards/margins": 12.83845329284668, | |
| "rewards/rejected": -15.163289070129395, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.5128205128205128, | |
| "grad_norm": 0.134765625, | |
| "learning_rate": 1.491656770462546e-05, | |
| "logits/chosen": -1.617491364479065, | |
| "logits/rejected": -1.2875694036483765, | |
| "logps/chosen": -59.133209228515625, | |
| "logps/rejected": -232.18191528320312, | |
| "loss": 0.0038, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.9275062084197998, | |
| "rewards/margins": 13.423171997070312, | |
| "rewards/rejected": -15.350679397583008, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5341880341880342, | |
| "grad_norm": 11.0, | |
| "learning_rate": 1.4862813877822923e-05, | |
| "logits/chosen": -1.6853389739990234, | |
| "logits/rejected": -1.3519870042800903, | |
| "logps/chosen": -58.672515869140625, | |
| "logps/rejected": -232.4235382080078, | |
| "loss": 0.0091, | |
| "rewards/accuracies": 0.9958333969116211, | |
| "rewards/chosen": -1.8344595432281494, | |
| "rewards/margins": 13.702409744262695, | |
| "rewards/rejected": -15.53686809539795, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.5555555555555556, | |
| "grad_norm": 0.18359375, | |
| "learning_rate": 1.4806141921473063e-05, | |
| "logits/chosen": -1.711216926574707, | |
| "logits/rejected": -1.331209421157837, | |
| "logps/chosen": -58.75749969482422, | |
| "logps/rejected": -241.5647430419922, | |
| "loss": 0.0058, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.8602139949798584, | |
| "rewards/margins": 14.427963256835938, | |
| "rewards/rejected": -16.288179397583008, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5769230769230769, | |
| "grad_norm": 0.0157470703125, | |
| "learning_rate": 1.4746583264985202e-05, | |
| "logits/chosen": -1.73836350440979, | |
| "logits/rejected": -1.358798623085022, | |
| "logps/chosen": -57.656578063964844, | |
| "logps/rejected": -245.814697265625, | |
| "loss": 0.0054, | |
| "rewards/accuracies": 0.9958333969116211, | |
| "rewards/chosen": -1.727595329284668, | |
| "rewards/margins": 14.943672180175781, | |
| "rewards/rejected": -16.671268463134766, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.5982905982905983, | |
| "grad_norm": 0.017578125, | |
| "learning_rate": 1.468417093868888e-05, | |
| "logits/chosen": -1.7839100360870361, | |
| "logits/rejected": -1.4424632787704468, | |
| "logps/chosen": -55.03651809692383, | |
| "logps/rejected": -240.14572143554688, | |
| "loss": 0.0047, | |
| "rewards/accuracies": 0.9958333969116211, | |
| "rewards/chosen": -1.5889627933502197, | |
| "rewards/margins": 14.578252792358398, | |
| "rewards/rejected": -16.16721534729004, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6196581196581197, | |
| "grad_norm": 0.06884765625, | |
| "learning_rate": 1.4618939555515721e-05, | |
| "logits/chosen": -1.7428079843521118, | |
| "logits/rejected": -1.4061057567596436, | |
| "logps/chosen": -59.45949172973633, | |
| "logps/rejected": -239.75048828125, | |
| "loss": 0.0028, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.9328705072402954, | |
| "rewards/margins": 14.28913402557373, | |
| "rewards/rejected": -16.222003936767578, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.6410256410256411, | |
| "grad_norm": 0.00933837890625, | |
| "learning_rate": 1.455092529180363e-05, | |
| "logits/chosen": -1.7827781438827515, | |
| "logits/rejected": -1.4337228536605835, | |
| "logps/chosen": -57.58295440673828, | |
| "logps/rejected": -243.2423553466797, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.8888952732086182, | |
| "rewards/margins": 14.729533195495605, | |
| "rewards/rejected": -16.618427276611328, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.6623931623931624, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 1.4480165867233946e-05, | |
| "logits/chosen": -1.7574710845947266, | |
| "logits/rejected": -1.4313023090362549, | |
| "logps/chosen": -61.97917938232422, | |
| "logps/rejected": -249.78890991210938, | |
| "loss": 0.0062, | |
| "rewards/accuracies": 0.9958333969116211, | |
| "rewards/chosen": -2.226684093475342, | |
| "rewards/margins": 14.859643936157227, | |
| "rewards/rejected": -17.086326599121094, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.6837606837606838, | |
| "grad_norm": 2.59375, | |
| "learning_rate": 1.440670052391267e-05, | |
| "logits/chosen": -1.776049017906189, | |
| "logits/rejected": -1.4136337041854858, | |
| "logps/chosen": -58.725502014160156, | |
| "logps/rejected": -239.2755126953125, | |
| "loss": 0.0046, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.0192863941192627, | |
| "rewards/margins": 14.30653190612793, | |
| "rewards/rejected": -16.32581901550293, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.7051282051282052, | |
| "grad_norm": 1.7734375, | |
| "learning_rate": 1.4330570004607398e-05, | |
| "logits/chosen": -1.8287827968597412, | |
| "logits/rejected": -1.4543850421905518, | |
| "logps/chosen": -57.99534225463867, | |
| "logps/rejected": -249.4701690673828, | |
| "loss": 0.0035, | |
| "rewards/accuracies": 0.9958333969116211, | |
| "rewards/chosen": -1.8924716711044312, | |
| "rewards/margins": 15.258901596069336, | |
| "rewards/rejected": -17.15137481689453, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.7264957264957265, | |
| "grad_norm": 1.359375, | |
| "learning_rate": 1.4251816530151986e-05, | |
| "logits/chosen": -1.7740707397460938, | |
| "logits/rejected": -1.4325586557388306, | |
| "logps/chosen": -60.86432647705078, | |
| "logps/rejected": -249.397216796875, | |
| "loss": 0.0032, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.059633255004883, | |
| "rewards/margins": 15.068583488464355, | |
| "rewards/rejected": -17.128215789794922, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.7478632478632479, | |
| "grad_norm": 0.060546875, | |
| "learning_rate": 1.4170483776031526e-05, | |
| "logits/chosen": -1.7101682424545288, | |
| "logits/rejected": -1.3118056058883667, | |
| "logps/chosen": -60.89410400390625, | |
| "logps/rejected": -252.2559814453125, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.1261119842529297, | |
| "rewards/margins": 15.250958442687988, | |
| "rewards/rejected": -17.377071380615234, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 0.031494140625, | |
| "learning_rate": 1.4086616848160574e-05, | |
| "logits/chosen": -1.6894880533218384, | |
| "logits/rejected": -1.2855875492095947, | |
| "logps/chosen": -66.87565612792969, | |
| "logps/rejected": -245.99557495117188, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.7102391719818115, | |
| "rewards/margins": 14.112167358398438, | |
| "rewards/rejected": -16.822406768798828, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.7905982905982906, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 1.4000262257868096e-05, | |
| "logits/chosen": -1.6548315286636353, | |
| "logits/rejected": -1.192333459854126, | |
| "logps/chosen": -64.77526092529297, | |
| "logps/rejected": -249.92196655273438, | |
| "loss": 0.0027, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.546926975250244, | |
| "rewards/margins": 14.788670539855957, | |
| "rewards/rejected": -17.33559799194336, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.811965811965812, | |
| "grad_norm": 1.1953125, | |
| "learning_rate": 1.3911467896102994e-05, | |
| "logits/chosen": -1.6022329330444336, | |
| "logits/rejected": -1.1850754022598267, | |
| "logps/chosen": -60.396514892578125, | |
| "logps/rejected": -247.5742645263672, | |
| "loss": 0.0054, | |
| "rewards/accuracies": 0.9958333969116211, | |
| "rewards/chosen": -2.1046481132507324, | |
| "rewards/margins": 15.000350952148438, | |
| "rewards/rejected": -17.104999542236328, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 0.10400390625, | |
| "learning_rate": 1.3820283006874503e-05, | |
| "logits/chosen": -1.563820719718933, | |
| "logits/rejected": -1.137289047241211, | |
| "logps/chosen": -65.57173156738281, | |
| "logps/rejected": -252.38955688476562, | |
| "loss": 0.0085, | |
| "rewards/accuracies": 0.9958333969116211, | |
| "rewards/chosen": -2.3488571643829346, | |
| "rewards/margins": 15.110745429992676, | |
| "rewards/rejected": -17.45960235595703, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.8547008547008547, | |
| "grad_norm": 0.1279296875, | |
| "learning_rate": 1.372675815994221e-05, | |
| "logits/chosen": -1.4945417642593384, | |
| "logits/rejected": -1.038334846496582, | |
| "logps/chosen": -54.2162971496582, | |
| "logps/rejected": -254.0222625732422, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.6657600402832031, | |
| "rewards/margins": 16.079242706298828, | |
| "rewards/rejected": -17.745004653930664, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8760683760683761, | |
| "grad_norm": 14.625, | |
| "learning_rate": 1.3630945222770829e-05, | |
| "logits/chosen": -1.5167819261550903, | |
| "logits/rejected": -1.0430529117584229, | |
| "logps/chosen": -60.5411262512207, | |
| "logps/rejected": -259.63555908203125, | |
| "loss": 0.0192, | |
| "rewards/accuracies": 0.9916666746139526, | |
| "rewards/chosen": -2.0362796783447266, | |
| "rewards/margins": 16.097753524780273, | |
| "rewards/rejected": -18.134033203125, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.8974358974358975, | |
| "grad_norm": 0.2119140625, | |
| "learning_rate": 1.3532897331765301e-05, | |
| "logits/chosen": -1.5572597980499268, | |
| "logits/rejected": -1.053264856338501, | |
| "logps/chosen": -59.34284591674805, | |
| "logps/rejected": -260.30206298828125, | |
| "loss": 0.0151, | |
| "rewards/accuracies": 0.9958333969116211, | |
| "rewards/chosen": -1.9360477924346924, | |
| "rewards/margins": 16.303897857666016, | |
| "rewards/rejected": -18.239948272705078, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.9188034188034188, | |
| "grad_norm": 0.0849609375, | |
| "learning_rate": 1.3432668862802134e-05, | |
| "logits/chosen": -1.4950945377349854, | |
| "logits/rejected": -1.014696478843689, | |
| "logps/chosen": -57.9535026550293, | |
| "logps/rejected": -256.272705078125, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.7511317729949951, | |
| "rewards/margins": 15.973749160766602, | |
| "rewards/rejected": -17.724878311157227, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.9401709401709402, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 1.3330315401073371e-05, | |
| "logits/chosen": -1.5073899030685425, | |
| "logits/rejected": -1.0442817211151123, | |
| "logps/chosen": -59.66225051879883, | |
| "logps/rejected": -249.38534545898438, | |
| "loss": 0.002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.872859001159668, | |
| "rewards/margins": 15.302103042602539, | |
| "rewards/rejected": -17.17496109008789, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.9615384615384616, | |
| "grad_norm": 2.625, | |
| "learning_rate": 1.3225893710259887e-05, | |
| "logits/chosen": -1.3290693759918213, | |
| "logits/rejected": -0.8046108484268188, | |
| "logps/chosen": -60.98704147338867, | |
| "logps/rejected": -252.48495483398438, | |
| "loss": 0.0065, | |
| "rewards/accuracies": 0.9958333969116211, | |
| "rewards/chosen": -2.179378032684326, | |
| "rewards/margins": 15.35717487335205, | |
| "rewards/rejected": -17.53655433654785, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.9829059829059829, | |
| "grad_norm": 1.359375, | |
| "learning_rate": 1.3119461701051105e-05, | |
| "logits/chosen": -1.4031749963760376, | |
| "logits/rejected": -0.8651553988456726, | |
| "logps/chosen": -57.99671173095703, | |
| "logps/rejected": -251.2525634765625, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.0349326133728027, | |
| "rewards/margins": 15.621841430664062, | |
| "rewards/rejected": -17.656774520874023, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.9957264957264957, | |
| "eval_logits/chosen": -1.4502625465393066, | |
| "eval_logits/rejected": -0.9043333530426025, | |
| "eval_logps/chosen": -61.05683517456055, | |
| "eval_logps/rejected": -259.54730224609375, | |
| "eval_loss": 0.0003871396475005895, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": -2.1613101959228516, | |
| "eval_rewards/margins": 15.991730690002441, | |
| "eval_rewards/rejected": -18.15304183959961, | |
| "eval_runtime": 9.8475, | |
| "eval_samples_per_second": 20.31, | |
| "eval_steps_per_second": 20.31, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.0042735042735043, | |
| "grad_norm": 0.0341796875, | |
| "learning_rate": 1.3011078399028605e-05, | |
| "logits/chosen": -1.430901288986206, | |
| "logits/rejected": -0.9248638153076172, | |
| "logps/chosen": -60.4477424621582, | |
| "logps/rejected": -265.5071716308594, | |
| "loss": 0.0036, | |
| "rewards/accuracies": 0.9958333969116211, | |
| "rewards/chosen": -2.0871260166168213, | |
| "rewards/margins": 16.610048294067383, | |
| "rewards/rejected": -18.697174072265625, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.0256410256410255, | |
| "grad_norm": 0.058349609375, | |
| "learning_rate": 1.2900803911931431e-05, | |
| "logits/chosen": -1.4457504749298096, | |
| "logits/rejected": -0.9034906625747681, | |
| "logps/chosen": -59.68601608276367, | |
| "logps/rejected": -263.2565612792969, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.9931907653808594, | |
| "rewards/margins": 16.479150772094727, | |
| "rewards/rejected": -18.472341537475586, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.047008547008547, | |
| "grad_norm": 0.037841796875, | |
| "learning_rate": 1.2788699396321252e-05, | |
| "logits/chosen": -1.39047110080719, | |
| "logits/rejected": -0.8637332916259766, | |
| "logps/chosen": -57.831581115722656, | |
| "logps/rejected": -255.6790771484375, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.925370454788208, | |
| "rewards/margins": 16.076345443725586, | |
| "rewards/rejected": -18.00171661376953, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.0683760683760684, | |
| "grad_norm": 0.0002689361572265625, | |
| "learning_rate": 1.2674827023665853e-05, | |
| "logits/chosen": -1.4293615818023682, | |
| "logits/rejected": -0.9234841465950012, | |
| "logps/chosen": -61.868263244628906, | |
| "logps/rejected": -270.99920654296875, | |
| "loss": 0.0008, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.230020046234131, | |
| "rewards/margins": 17.106487274169922, | |
| "rewards/rejected": -19.336505889892578, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.0897435897435896, | |
| "grad_norm": 0.08154296875, | |
| "learning_rate": 1.255924994585978e-05, | |
| "logits/chosen": -1.4079844951629639, | |
| "logits/rejected": -0.8683494329452515, | |
| "logps/chosen": -63.99333953857422, | |
| "logps/rejected": -266.74462890625, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.376952886581421, | |
| "rewards/margins": 16.51467514038086, | |
| "rewards/rejected": -18.89162826538086, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 0.00147247314453125, | |
| "learning_rate": 1.2442032260201255e-05, | |
| "logits/chosen": -1.400887131690979, | |
| "logits/rejected": -0.890865683555603, | |
| "logps/chosen": -62.627723693847656, | |
| "logps/rejected": -263.9581604003906, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.308192729949951, | |
| "rewards/margins": 16.26297950744629, | |
| "rewards/rejected": -18.571170806884766, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.1324786324786325, | |
| "grad_norm": 0.1982421875, | |
| "learning_rate": 1.2323238973844796e-05, | |
| "logits/chosen": -1.438955545425415, | |
| "logits/rejected": -0.9066799283027649, | |
| "logps/chosen": -63.32421875, | |
| "logps/rejected": -273.82049560546875, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.295706272125244, | |
| "rewards/margins": 17.04709243774414, | |
| "rewards/rejected": -19.342798233032227, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.1538461538461537, | |
| "grad_norm": 0.011962890625, | |
| "learning_rate": 1.2202935967749212e-05, | |
| "logits/chosen": -1.3738555908203125, | |
| "logits/rejected": -0.8493305444717407, | |
| "logps/chosen": -67.02043151855469, | |
| "logps/rejected": -268.3296813964844, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.584259510040283, | |
| "rewards/margins": 16.276723861694336, | |
| "rewards/rejected": -18.86098289489746, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.1752136752136753, | |
| "grad_norm": 1.765625, | |
| "learning_rate": 1.2081189960141038e-05, | |
| "logits/chosen": -1.424109697341919, | |
| "logits/rejected": -0.8860370516777039, | |
| "logps/chosen": -61.854644775390625, | |
| "logps/rejected": -267.197021484375, | |
| "loss": 0.0013, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.1506810188293457, | |
| "rewards/margins": 16.72945213317871, | |
| "rewards/rejected": -18.880136489868164, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.1965811965811965, | |
| "grad_norm": 0.185546875, | |
| "learning_rate": 1.1958068469513604e-05, | |
| "logits/chosen": -1.4285290241241455, | |
| "logits/rejected": -0.9055356979370117, | |
| "logps/chosen": -65.09664916992188, | |
| "logps/rejected": -277.3038635253906, | |
| "loss": 0.0015, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.459376096725464, | |
| "rewards/margins": 17.357646942138672, | |
| "rewards/rejected": -19.817026138305664, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.217948717948718, | |
| "grad_norm": 0.0059814453125, | |
| "learning_rate": 1.1833639777182316e-05, | |
| "logits/chosen": -1.306983232498169, | |
| "logits/rejected": -0.7688174247741699, | |
| "logps/chosen": -59.734840393066406, | |
| "logps/rejected": -272.2923889160156, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.1238536834716797, | |
| "rewards/margins": 17.397279739379883, | |
| "rewards/rejected": -19.52113151550293, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.2393162393162394, | |
| "grad_norm": 0.004547119140625, | |
| "learning_rate": 1.170797288941685e-05, | |
| "logits/chosen": -1.2420094013214111, | |
| "logits/rejected": -0.7252348065376282, | |
| "logps/chosen": -66.8678207397461, | |
| "logps/rejected": -281.2005920410156, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.6819348335266113, | |
| "rewards/margins": 17.403133392333984, | |
| "rewards/rejected": -20.085067749023438, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.2606837606837606, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 1.1581137499171342e-05, | |
| "logits/chosen": -1.2711966037750244, | |
| "logits/rejected": -0.7393882870674133, | |
| "logps/chosen": -67.3682861328125, | |
| "logps/rejected": -272.0533142089844, | |
| "loss": 0.0012, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.7543585300445557, | |
| "rewards/margins": 16.66292953491211, | |
| "rewards/rejected": -19.41728973388672, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.282051282051282, | |
| "grad_norm": 0.01080322265625, | |
| "learning_rate": 1.145320394743371e-05, | |
| "logits/chosen": -1.2599390745162964, | |
| "logits/rejected": -0.7466105222702026, | |
| "logps/chosen": -65.01288604736328, | |
| "logps/rejected": -266.981689453125, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.5523085594177246, | |
| "rewards/margins": 16.560453414916992, | |
| "rewards/rejected": -19.112764358520508, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.3034188034188035, | |
| "grad_norm": 0.005706787109375, | |
| "learning_rate": 1.1324243184215622e-05, | |
| "logits/chosen": -1.268808364868164, | |
| "logits/rejected": -0.7630107998847961, | |
| "logps/chosen": -67.40531921386719, | |
| "logps/rejected": -282.67205810546875, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.892889976501465, | |
| "rewards/margins": 17.570592880249023, | |
| "rewards/rejected": -20.463483810424805, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.3247863247863247, | |
| "grad_norm": 0.00238037109375, | |
| "learning_rate": 1.1194326729204686e-05, | |
| "logits/chosen": -1.2974001169204712, | |
| "logits/rejected": -0.7796735167503357, | |
| "logps/chosen": -67.35159301757812, | |
| "logps/rejected": -279.50341796875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.8483831882476807, | |
| "rewards/margins": 17.4374942779541, | |
| "rewards/rejected": -20.285879135131836, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.3461538461538463, | |
| "grad_norm": 0.021484375, | |
| "learning_rate": 1.1063526632100717e-05, | |
| "logits/chosen": -1.2823902368545532, | |
| "logits/rejected": -0.8006687164306641, | |
| "logps/chosen": -71.19620513916016, | |
| "logps/rejected": -273.39569091796875, | |
| "loss": 0.0031, | |
| "rewards/accuracies": 0.9958333969116211, | |
| "rewards/chosen": -3.098231077194214, | |
| "rewards/margins": 16.550390243530273, | |
| "rewards/rejected": -19.648624420166016, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.3675213675213675, | |
| "grad_norm": 0.022216796875, | |
| "learning_rate": 1.0931915432658055e-05, | |
| "logits/chosen": -1.2740647792816162, | |
| "logits/rejected": -0.7717633843421936, | |
| "logps/chosen": -68.88563537597656, | |
| "logps/rejected": -277.2671813964844, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.923701763153076, | |
| "rewards/margins": 17.007097244262695, | |
| "rewards/rejected": -19.930797576904297, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.3888888888888888, | |
| "grad_norm": 0.048095703125, | |
| "learning_rate": 1.0799566120456133e-05, | |
| "logits/chosen": -1.2586907148361206, | |
| "logits/rejected": -0.7510126829147339, | |
| "logps/chosen": -71.87751770019531, | |
| "logps/rejected": -275.9109191894531, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.1944289207458496, | |
| "rewards/margins": 16.62671661376953, | |
| "rewards/rejected": -19.82114601135254, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.4102564102564101, | |
| "grad_norm": 0.0052490234375, | |
| "learning_rate": 1.066655209442054e-05, | |
| "logits/chosen": -1.280989646911621, | |
| "logits/rejected": -0.772638201713562, | |
| "logps/chosen": -71.73796081542969, | |
| "logps/rejected": -279.8167419433594, | |
| "loss": 0.0021, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.172919511795044, | |
| "rewards/margins": 16.89200210571289, | |
| "rewards/rejected": -20.064922332763672, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.4316239316239316, | |
| "grad_norm": 0.007720947265625, | |
| "learning_rate": 1.0532947122117101e-05, | |
| "logits/chosen": -1.27340567111969, | |
| "logits/rejected": -0.7604951858520508, | |
| "logps/chosen": -69.45366668701172, | |
| "logps/rejected": -278.88409423828125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.9620535373687744, | |
| "rewards/margins": 17.169063568115234, | |
| "rewards/rejected": -20.13111686706543, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.452991452991453, | |
| "grad_norm": 0.046630859375, | |
| "learning_rate": 1.0398825298841499e-05, | |
| "logits/chosen": -1.2809860706329346, | |
| "logits/rejected": -0.838448166847229, | |
| "logps/chosen": -72.61773681640625, | |
| "logps/rejected": -281.2303466796875, | |
| "loss": 0.0011, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.1478874683380127, | |
| "rewards/margins": 17.092044830322266, | |
| "rewards/rejected": -20.239933013916016, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.4743589743589745, | |
| "grad_norm": 0.01025390625, | |
| "learning_rate": 1.0264261006527144e-05, | |
| "logits/chosen": -1.301695704460144, | |
| "logits/rejected": -0.8668543100357056, | |
| "logps/chosen": -69.15229797363281, | |
| "logps/rejected": -270.77130126953125, | |
| "loss": 0.0035, | |
| "rewards/accuracies": 0.9958333969116211, | |
| "rewards/chosen": -2.9070522785186768, | |
| "rewards/margins": 16.549983978271484, | |
| "rewards/rejected": -19.457035064697266, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.4957264957264957, | |
| "grad_norm": 0.5625, | |
| "learning_rate": 1.0129328872494075e-05, | |
| "logits/chosen": -1.385507345199585, | |
| "logits/rejected": -0.9304911494255066, | |
| "logps/chosen": -70.08064270019531, | |
| "logps/rejected": -279.90045166015625, | |
| "loss": 0.003, | |
| "rewards/accuracies": 0.9958333969116211, | |
| "rewards/chosen": -3.0408215522766113, | |
| "rewards/margins": 17.223588943481445, | |
| "rewards/rejected": -20.2644100189209, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.517094017094017, | |
| "grad_norm": 0.12890625, | |
| "learning_rate": 9.994103728061786e-06, | |
| "logits/chosen": -1.3539297580718994, | |
| "logits/rejected": -0.8995206952095032, | |
| "logps/chosen": -71.8431625366211, | |
| "logps/rejected": -278.0020446777344, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.1649577617645264, | |
| "rewards/margins": 16.83130645751953, | |
| "rewards/rejected": -19.99626350402832, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.5384615384615383, | |
| "grad_norm": 0.006134033203125, | |
| "learning_rate": 9.858660567048902e-06, | |
| "logits/chosen": -1.3628873825073242, | |
| "logits/rejected": -0.8617550730705261, | |
| "logps/chosen": -73.44374084472656, | |
| "logps/rejected": -291.49237060546875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.3345115184783936, | |
| "rewards/margins": 17.942880630493164, | |
| "rewards/rejected": -21.277393341064453, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.5598290598290598, | |
| "grad_norm": 0.036376953125, | |
| "learning_rate": 9.72307450418274e-06, | |
| "logits/chosen": -1.3687984943389893, | |
| "logits/rejected": -0.8952552676200867, | |
| "logps/chosen": -68.8270492553711, | |
| "logps/rejected": -276.6328125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.877126932144165, | |
| "rewards/margins": 17.08051872253418, | |
| "rewards/rejected": -19.957645416259766, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.5811965811965814, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 9.587420733441835e-06, | |
| "logits/chosen": -1.3641754388809204, | |
| "logits/rejected": -0.9082571864128113, | |
| "logps/chosen": -67.42304992675781, | |
| "logps/rejected": -281.8648986816406, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.7612674236297607, | |
| "rewards/margins": 17.61087989807129, | |
| "rewards/rejected": -20.372146606445312, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.6025641025641026, | |
| "grad_norm": 0.000423431396484375, | |
| "learning_rate": 9.45177448635447e-06, | |
| "logits/chosen": -1.3942601680755615, | |
| "logits/rejected": -0.8580430746078491, | |
| "logps/chosen": -67.60791015625, | |
| "logps/rejected": -284.3832092285156, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.805607795715332, | |
| "rewards/margins": 18.005619049072266, | |
| "rewards/rejected": -20.811227798461914, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.623931623931624, | |
| "grad_norm": 0.0255126953125, | |
| "learning_rate": 9.316210990276434e-06, | |
| "logits/chosen": -1.3189040422439575, | |
| "logits/rejected": -0.8662185668945312, | |
| "logps/chosen": -67.1756362915039, | |
| "logps/rejected": -272.69500732421875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.7223994731903076, | |
| "rewards/margins": 16.855377197265625, | |
| "rewards/rejected": -19.577777862548828, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.6452991452991452, | |
| "grad_norm": 0.04638671875, | |
| "learning_rate": 9.18080542667105e-06, | |
| "logits/chosen": -1.3573819398880005, | |
| "logits/rejected": -0.8372514843940735, | |
| "logps/chosen": -68.33662414550781, | |
| "logps/rejected": -291.3061218261719, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.794102191925049, | |
| "rewards/margins": 18.328411102294922, | |
| "rewards/rejected": -21.122512817382812, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 0.000885009765625, | |
| "learning_rate": 9.045632889414686e-06, | |
| "logits/chosen": -1.345085859298706, | |
| "logits/rejected": -0.8467508554458618, | |
| "logps/chosen": -66.54302978515625, | |
| "logps/rejected": -283.5255126953125, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.698514461517334, | |
| "rewards/margins": 17.84862518310547, | |
| "rewards/rejected": -20.54714012145996, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.688034188034188, | |
| "grad_norm": 0.02734375, | |
| "learning_rate": 8.910768343150828e-06, | |
| "logits/chosen": -1.3259168863296509, | |
| "logits/rejected": -0.855597198009491, | |
| "logps/chosen": -69.09947204589844, | |
| "logps/rejected": -284.14801025390625, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.0127596855163574, | |
| "rewards/margins": 17.691822052001953, | |
| "rewards/rejected": -20.704580307006836, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.7094017094017095, | |
| "grad_norm": 0.0032806396484375, | |
| "learning_rate": 8.77628658171581e-06, | |
| "logits/chosen": -1.3521082401275635, | |
| "logits/rejected": -0.896456241607666, | |
| "logps/chosen": -67.49749755859375, | |
| "logps/rejected": -278.388916015625, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.764543056488037, | |
| "rewards/margins": 17.227275848388672, | |
| "rewards/rejected": -19.991817474365234, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.7307692307692308, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 8.642262186659298e-06, | |
| "logits/chosen": -1.311095952987671, | |
| "logits/rejected": -0.8420788049697876, | |
| "logps/chosen": -68.78193664550781, | |
| "logps/rejected": -280.7834777832031, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.862732172012329, | |
| "rewards/margins": 17.331756591796875, | |
| "rewards/rejected": -20.194486618041992, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.7521367521367521, | |
| "grad_norm": 0.00640869140625, | |
| "learning_rate": 8.508769485882487e-06, | |
| "logits/chosen": -1.3232362270355225, | |
| "logits/rejected": -0.8540644645690918, | |
| "logps/chosen": -74.96504974365234, | |
| "logps/rejected": -283.9762268066406, | |
| "loss": 0.0011, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.3426380157470703, | |
| "rewards/margins": 17.237430572509766, | |
| "rewards/rejected": -20.580068588256836, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.7735042735042734, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 8.375882512416969e-06, | |
| "logits/chosen": -1.2893245220184326, | |
| "logits/rejected": -0.7882084846496582, | |
| "logps/chosen": -72.9918212890625, | |
| "logps/rejected": -286.07623291015625, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.248950481414795, | |
| "rewards/margins": 17.547632217407227, | |
| "rewards/rejected": -20.796581268310547, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.7948717948717947, | |
| "grad_norm": 0.019287109375, | |
| "learning_rate": 8.243674963367137e-06, | |
| "logits/chosen": -1.3166277408599854, | |
| "logits/rejected": -0.8026930093765259, | |
| "logps/chosen": -76.18501281738281, | |
| "logps/rejected": -281.90447998046875, | |
| "loss": 0.0032, | |
| "rewards/accuracies": 0.9958333969116211, | |
| "rewards/chosen": -3.600315570831299, | |
| "rewards/margins": 16.638935089111328, | |
| "rewards/rejected": -20.239248275756836, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.8162393162393162, | |
| "grad_norm": 0.0012359619140625, | |
| "learning_rate": 8.11222015903888e-06, | |
| "logits/chosen": -1.3423035144805908, | |
| "logits/rejected": -0.8135835528373718, | |
| "logps/chosen": -78.0848159790039, | |
| "logps/rejected": -287.83135986328125, | |
| "loss": 0.0047, | |
| "rewards/accuracies": 0.9958333969116211, | |
| "rewards/chosen": -3.862813949584961, | |
| "rewards/margins": 17.16622543334961, | |
| "rewards/rejected": -21.029037475585938, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.8376068376068377, | |
| "grad_norm": 0.037353515625, | |
| "learning_rate": 7.981591002277265e-06, | |
| "logits/chosen": -1.3140472173690796, | |
| "logits/rejected": -0.8106688261032104, | |
| "logps/chosen": -77.36860656738281, | |
| "logps/rejected": -281.99664306640625, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.733330488204956, | |
| "rewards/margins": 16.830041885375977, | |
| "rewards/rejected": -20.563371658325195, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.858974358974359, | |
| "grad_norm": 0.03515625, | |
| "learning_rate": 7.851859938035712e-06, | |
| "logits/chosen": -1.304713487625122, | |
| "logits/rejected": -0.7914744019508362, | |
| "logps/chosen": -78.41984558105469, | |
| "logps/rejected": -291.4969177246094, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.7893600463867188, | |
| "rewards/margins": 17.505069732666016, | |
| "rewards/rejected": -21.294429779052734, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.8803418803418803, | |
| "grad_norm": 0.03271484375, | |
| "learning_rate": 7.723098913199118e-06, | |
| "logits/chosen": -1.3396222591400146, | |
| "logits/rejected": -0.834884524345398, | |
| "logps/chosen": -75.61878967285156, | |
| "logps/rejected": -278.4013366699219, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.660414218902588, | |
| "rewards/margins": 16.659198760986328, | |
| "rewards/rejected": -20.31961441040039, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.9017094017094016, | |
| "grad_norm": 0.015625, | |
| "learning_rate": 7.595379336683204e-06, | |
| "logits/chosen": -1.3091070652008057, | |
| "logits/rejected": -0.7569972276687622, | |
| "logps/chosen": -72.38371276855469, | |
| "logps/rejected": -287.745849609375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.400171995162964, | |
| "rewards/margins": 17.77309226989746, | |
| "rewards/rejected": -21.17326545715332, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.9230769230769231, | |
| "grad_norm": 0.004638671875, | |
| "learning_rate": 7.468772039832218e-06, | |
| "logits/chosen": -1.2781813144683838, | |
| "logits/rejected": -0.7406023740768433, | |
| "logps/chosen": -68.62843322753906, | |
| "logps/rejected": -279.81103515625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.0305323600769043, | |
| "rewards/margins": 17.346649169921875, | |
| "rewards/rejected": -20.37718391418457, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.9444444444444444, | |
| "grad_norm": 0.0026092529296875, | |
| "learning_rate": 7.3433472371369404e-06, | |
| "logits/chosen": -1.349867820739746, | |
| "logits/rejected": -0.8363698720932007, | |
| "logps/chosen": -74.56883239746094, | |
| "logps/rejected": -285.27227783203125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.506099224090576, | |
| "rewards/margins": 17.23251724243164, | |
| "rewards/rejected": -20.738616943359375, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.965811965811966, | |
| "grad_norm": 0.00165557861328125, | |
| "learning_rate": 7.219174487294784e-06, | |
| "logits/chosen": -1.3465303182601929, | |
| "logits/rejected": -0.8472278714179993, | |
| "logps/chosen": -71.95396423339844, | |
| "logps/rejected": -288.6927795410156, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.223040819168091, | |
| "rewards/margins": 17.83417510986328, | |
| "rewards/rejected": -21.05721664428711, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.9871794871794872, | |
| "grad_norm": 0.003509521484375, | |
| "learning_rate": 7.0963226546336e-06, | |
| "logits/chosen": -1.3585379123687744, | |
| "logits/rejected": -0.8536975979804993, | |
| "logps/chosen": -72.73930358886719, | |
| "logps/rejected": -283.37579345703125, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.333233594894409, | |
| "rewards/margins": 17.287670135498047, | |
| "rewards/rejected": -20.62090492248535, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.9914529914529915, | |
| "eval_logits/chosen": -1.3740124702453613, | |
| "eval_logits/rejected": -0.8455994129180908, | |
| "eval_logps/chosen": -73.04570007324219, | |
| "eval_logps/rejected": -284.90460205078125, | |
| "eval_loss": 0.00021937819838058203, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": -3.360196590423584, | |
| "eval_rewards/margins": 17.328575134277344, | |
| "eval_rewards/rejected": -20.688772201538086, | |
| "eval_runtime": 9.5669, | |
| "eval_samples_per_second": 20.905, | |
| "eval_steps_per_second": 20.905, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 2.0085470085470085, | |
| "grad_norm": 0.038818359375, | |
| "learning_rate": 6.974859870920561e-06, | |
| "logits/chosen": -1.2795295715332031, | |
| "logits/rejected": -0.8111523389816284, | |
| "logps/chosen": -75.71898651123047, | |
| "logps/rejected": -279.89599609375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.6148743629455566, | |
| "rewards/margins": 16.733488082885742, | |
| "rewards/rejected": -20.348360061645508, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.02991452991453, | |
| "grad_norm": 0.006195068359375, | |
| "learning_rate": 6.8548534975773135e-06, | |
| "logits/chosen": -1.3317922353744507, | |
| "logits/rejected": -0.8281890153884888, | |
| "logps/chosen": -75.41677856445312, | |
| "logps/rejected": -286.36761474609375, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.4711709022521973, | |
| "rewards/margins": 17.307971954345703, | |
| "rewards/rejected": -20.779144287109375, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 2.051282051282051, | |
| "grad_norm": 0.341796875, | |
| "learning_rate": 6.736370088322359e-06, | |
| "logits/chosen": -1.3174855709075928, | |
| "logits/rejected": -0.7978845238685608, | |
| "logps/chosen": -74.10897064208984, | |
| "logps/rejected": -283.9443664550781, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.4749629497528076, | |
| "rewards/margins": 17.323604583740234, | |
| "rewards/rejected": -20.798566818237305, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.072649572649573, | |
| "grad_norm": 0.007293701171875, | |
| "learning_rate": 6.619475352261356e-06, | |
| "logits/chosen": -1.3000952005386353, | |
| "logits/rejected": -0.8089855909347534, | |
| "logps/chosen": -78.87946319580078, | |
| "logps/rejected": -287.0957336425781, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.6867222785949707, | |
| "rewards/margins": 17.182767868041992, | |
| "rewards/rejected": -20.869489669799805, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 2.094017094017094, | |
| "grad_norm": 0.002105712890625, | |
| "learning_rate": 6.504234117445857e-06, | |
| "logits/chosen": -1.3139859437942505, | |
| "logits/rejected": -0.8154487609863281, | |
| "logps/chosen": -74.31788635253906, | |
| "logps/rejected": -284.235107421875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.5273959636688232, | |
| "rewards/margins": 17.246944427490234, | |
| "rewards/rejected": -20.774341583251953, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.1153846153846154, | |
| "grad_norm": 0.0240478515625, | |
| "learning_rate": 6.39071029492065e-06, | |
| "logits/chosen": -1.2831732034683228, | |
| "logits/rejected": -0.7532753348350525, | |
| "logps/chosen": -73.73322296142578, | |
| "logps/rejected": -282.9715270996094, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.508289337158203, | |
| "rewards/margins": 17.217966079711914, | |
| "rewards/rejected": -20.726253509521484, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 2.1367521367521367, | |
| "grad_norm": 0.162109375, | |
| "learning_rate": 6.2789668432796535e-06, | |
| "logits/chosen": -1.2966052293777466, | |
| "logits/rejected": -0.8182178735733032, | |
| "logps/chosen": -75.21055603027344, | |
| "logps/rejected": -284.99566650390625, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.6024773120880127, | |
| "rewards/margins": 17.332172393798828, | |
| "rewards/rejected": -20.934650421142578, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.1367521367521367, | |
| "eval_logits/chosen": -1.3704440593719482, | |
| "eval_logits/rejected": -0.8410933017730713, | |
| "eval_logps/chosen": -73.29509735107422, | |
| "eval_logps/rejected": -285.1749572753906, | |
| "eval_loss": 0.00022948597325012088, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": -3.385136365890503, | |
| "eval_rewards/margins": 17.330673217773438, | |
| "eval_rewards/rejected": -20.715810775756836, | |
| "eval_runtime": 9.5382, | |
| "eval_samples_per_second": 20.968, | |
| "eval_steps_per_second": 20.968, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.158119658119658, | |
| "grad_norm": 0.009033203125, | |
| "learning_rate": 6.16906573375004e-06, | |
| "logits/chosen": -1.3252205848693848, | |
| "logits/rejected": -0.8390571475028992, | |
| "logps/chosen": -74.536376953125, | |
| "logps/rejected": -282.44195556640625, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.4960930347442627, | |
| "rewards/margins": 17.042308807373047, | |
| "rewards/rejected": -20.538402557373047, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 2.1794871794871793, | |
| "grad_norm": 0.00933837890625, | |
| "learning_rate": 6.061067915823923e-06, | |
| "logits/chosen": -1.2685729265213013, | |
| "logits/rejected": -0.7679704427719116, | |
| "logps/chosen": -72.36498260498047, | |
| "logps/rejected": -282.66351318359375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.318829298019409, | |
| "rewards/margins": 17.183773040771484, | |
| "rewards/rejected": -20.50260353088379, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.200854700854701, | |
| "grad_norm": 0.0196533203125, | |
| "learning_rate": 5.955033283456711e-06, | |
| "logits/chosen": -1.2974504232406616, | |
| "logits/rejected": -0.7774112820625305, | |
| "logps/chosen": -78.15269470214844, | |
| "logps/rejected": -293.7340393066406, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.7473702430725098, | |
| "rewards/margins": 17.710281372070312, | |
| "rewards/rejected": -21.457651138305664, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 0.0023040771484375, | |
| "learning_rate": 5.8510206418507914e-06, | |
| "logits/chosen": -1.3559068441390991, | |
| "logits/rejected": -0.8591842651367188, | |
| "logps/chosen": -77.19640350341797, | |
| "logps/rejected": -300.4443359375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.6723015308380127, | |
| "rewards/margins": 18.35979461669922, | |
| "rewards/rejected": -22.03209686279297, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.2435897435897436, | |
| "grad_norm": 0.01385498046875, | |
| "learning_rate": 5.749087674843095e-06, | |
| "logits/chosen": -1.2999016046524048, | |
| "logits/rejected": -0.8241308927536011, | |
| "logps/chosen": -70.76306915283203, | |
| "logps/rejected": -283.5411376953125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.232194423675537, | |
| "rewards/margins": 17.38874626159668, | |
| "rewards/rejected": -20.620941162109375, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 2.264957264957265, | |
| "grad_norm": 0.0264892578125, | |
| "learning_rate": 5.649290912914482e-06, | |
| "logits/chosen": -1.3043696880340576, | |
| "logits/rejected": -0.8295344114303589, | |
| "logps/chosen": -79.15299224853516, | |
| "logps/rejected": -298.5386962890625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.7782886028289795, | |
| "rewards/margins": 17.993247985839844, | |
| "rewards/rejected": -21.771535873413086, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.286324786324786, | |
| "grad_norm": 0.04931640625, | |
| "learning_rate": 5.5516857018388144e-06, | |
| "logits/chosen": -1.355273962020874, | |
| "logits/rejected": -0.8746377229690552, | |
| "logps/chosen": -74.15048217773438, | |
| "logps/rejected": -281.2100524902344, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.4310333728790283, | |
| "rewards/margins": 16.96903419494629, | |
| "rewards/rejected": -20.400066375732422, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 2.3076923076923075, | |
| "grad_norm": 0.01397705078125, | |
| "learning_rate": 5.456326171989005e-06, | |
| "logits/chosen": -1.3123310804367065, | |
| "logits/rejected": -0.840388298034668, | |
| "logps/chosen": -71.68992614746094, | |
| "logps/rejected": -300.37091064453125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.202878475189209, | |
| "rewards/margins": 18.644420623779297, | |
| "rewards/rejected": -21.84729766845703, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.3290598290598292, | |
| "grad_norm": 0.0027618408203125, | |
| "learning_rate": 5.363265208317156e-06, | |
| "logits/chosen": -1.2788275480270386, | |
| "logits/rejected": -0.8199743032455444, | |
| "logps/chosen": -73.48957824707031, | |
| "logps/rejected": -281.1923828125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.415186643600464, | |
| "rewards/margins": 17.106266021728516, | |
| "rewards/rejected": -20.52145004272461, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 2.3504273504273505, | |
| "grad_norm": 0.01055908203125, | |
| "learning_rate": 5.272554421025347e-06, | |
| "logits/chosen": -1.3188756704330444, | |
| "logits/rejected": -0.8151782751083374, | |
| "logps/chosen": -74.64764404296875, | |
| "logps/rejected": -291.92108154296875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.5020947456359863, | |
| "rewards/margins": 17.861783981323242, | |
| "rewards/rejected": -21.36387825012207, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.371794871794872, | |
| "grad_norm": 0.05078125, | |
| "learning_rate": 5.184244116943411e-06, | |
| "logits/chosen": -1.3126680850982666, | |
| "logits/rejected": -0.8074380159378052, | |
| "logps/chosen": -73.89201354980469, | |
| "logps/rejected": -285.258056640625, | |
| "loss": 0.003, | |
| "rewards/accuracies": 0.9958333969116211, | |
| "rewards/chosen": -3.3727481365203857, | |
| "rewards/margins": 17.37813377380371, | |
| "rewards/rejected": -20.75088119506836, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 2.393162393162393, | |
| "grad_norm": 0.00604248046875, | |
| "learning_rate": 5.098383271629512e-06, | |
| "logits/chosen": -1.3314543962478638, | |
| "logits/rejected": -0.8163145184516907, | |
| "logps/chosen": -73.50102233886719, | |
| "logps/rejected": -279.52532958984375, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.414262294769287, | |
| "rewards/margins": 16.943660736083984, | |
| "rewards/rejected": -20.357921600341797, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.4145299145299144, | |
| "grad_norm": 0.00311279296875, | |
| "learning_rate": 5.015019502209056e-06, | |
| "logits/chosen": -1.3196806907653809, | |
| "logits/rejected": -0.8105131387710571, | |
| "logps/chosen": -72.18685913085938, | |
| "logps/rejected": -275.95123291015625, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.361246109008789, | |
| "rewards/margins": 16.643529891967773, | |
| "rewards/rejected": -20.004776000976562, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 2.435897435897436, | |
| "grad_norm": 0.000823974609375, | |
| "learning_rate": 4.934199040966955e-06, | |
| "logits/chosen": -1.3401740789413452, | |
| "logits/rejected": -0.8449984788894653, | |
| "logps/chosen": -73.99894714355469, | |
| "logps/rejected": -279.31915283203125, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.482757568359375, | |
| "rewards/margins": 16.8636531829834, | |
| "rewards/rejected": -20.346412658691406, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.4572649572649574, | |
| "grad_norm": 0.03271484375, | |
| "learning_rate": 4.855966709707881e-06, | |
| "logits/chosen": -1.308977484703064, | |
| "logits/rejected": -0.8370776176452637, | |
| "logps/chosen": -77.53469848632812, | |
| "logps/rejected": -285.58367919921875, | |
| "loss": 0.0029, | |
| "rewards/accuracies": 0.9958333969116211, | |
| "rewards/chosen": -3.7107481956481934, | |
| "rewards/margins": 17.121841430664062, | |
| "rewards/rejected": -20.832592010498047, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 2.4786324786324787, | |
| "grad_norm": 0.0299072265625, | |
| "learning_rate": 4.780365894898799e-06, | |
| "logits/chosen": -1.3271667957305908, | |
| "logits/rejected": -0.8259018063545227, | |
| "logps/chosen": -74.68269348144531, | |
| "logps/rejected": -287.12078857421875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.4662697315216064, | |
| "rewards/margins": 17.42196273803711, | |
| "rewards/rejected": -20.888233184814453, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 0.00921630859375, | |
| "learning_rate": 4.7074385236074684e-06, | |
| "logits/chosen": -1.3541457653045654, | |
| "logits/rejected": -0.8319869041442871, | |
| "logps/chosen": -78.34286499023438, | |
| "logps/rejected": -294.329833984375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.6850643157958984, | |
| "rewards/margins": 17.795787811279297, | |
| "rewards/rejected": -21.480854034423828, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 2.5213675213675213, | |
| "grad_norm": 0.052001953125, | |
| "learning_rate": 4.63722504025034e-06, | |
| "logits/chosen": -1.3320066928863525, | |
| "logits/rejected": -0.8415569067001343, | |
| "logps/chosen": -71.98558044433594, | |
| "logps/rejected": -284.99346923828125, | |
| "loss": 0.0011, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.249178647994995, | |
| "rewards/margins": 17.486406326293945, | |
| "rewards/rejected": -20.735586166381836, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.5427350427350426, | |
| "grad_norm": 0.01025390625, | |
| "learning_rate": 4.569764384162676e-06, | |
| "logits/chosen": -1.3463108539581299, | |
| "logits/rejected": -0.8353781700134277, | |
| "logps/chosen": -68.05410766601562, | |
| "logps/rejected": -284.91461181640625, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.9681944847106934, | |
| "rewards/margins": 17.779176712036133, | |
| "rewards/rejected": -20.74736976623535, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 2.564102564102564, | |
| "grad_norm": 0.01422119140625, | |
| "learning_rate": 4.50509396800341e-06, | |
| "logits/chosen": -1.2894313335418701, | |
| "logits/rejected": -0.7797183990478516, | |
| "logps/chosen": -72.90419006347656, | |
| "logps/rejected": -285.7866516113281, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.3224644660949707, | |
| "rewards/margins": 17.589801788330078, | |
| "rewards/rejected": -20.91226577758789, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.5854700854700856, | |
| "grad_norm": 0.0247802734375, | |
| "learning_rate": 4.443249657006627e-06, | |
| "logits/chosen": -1.2982523441314697, | |
| "logits/rejected": -0.7844825983047485, | |
| "logps/chosen": -70.4168472290039, | |
| "logps/rejected": -290.3528747558594, | |
| "loss": 0.0029, | |
| "rewards/accuracies": 0.9958333969116211, | |
| "rewards/chosen": -3.1497464179992676, | |
| "rewards/margins": 18.14370346069336, | |
| "rewards/rejected": -21.293447494506836, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 2.606837606837607, | |
| "grad_norm": 0.0164794921875, | |
| "learning_rate": 4.384265749091266e-06, | |
| "logits/chosen": -1.2762781381607056, | |
| "logits/rejected": -0.7862453460693359, | |
| "logps/chosen": -78.71661376953125, | |
| "logps/rejected": -288.5256042480469, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.7849228382110596, | |
| "rewards/margins": 17.296167373657227, | |
| "rewards/rejected": -21.081090927124023, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.628205128205128, | |
| "grad_norm": 0.00482177734375, | |
| "learning_rate": 4.328174955840002e-06, | |
| "logits/chosen": -1.2989494800567627, | |
| "logits/rejected": -0.791740357875824, | |
| "logps/chosen": -68.19273376464844, | |
| "logps/rejected": -283.67010498046875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.0757412910461426, | |
| "rewards/margins": 17.688796997070312, | |
| "rewards/rejected": -20.764535903930664, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 2.6495726495726495, | |
| "grad_norm": 0.0026092529296875, | |
| "learning_rate": 4.275008384357902e-06, | |
| "logits/chosen": -1.3389320373535156, | |
| "logits/rejected": -0.8387205004692078, | |
| "logps/chosen": -72.53665924072266, | |
| "logps/rejected": -283.5371398925781, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.2912750244140625, | |
| "rewards/margins": 17.413835525512695, | |
| "rewards/rejected": -20.705108642578125, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.6709401709401708, | |
| "grad_norm": 0.0028533935546875, | |
| "learning_rate": 4.224795520020898e-06, | |
| "logits/chosen": -1.2840917110443115, | |
| "logits/rejected": -0.7634187936782837, | |
| "logps/chosen": -75.63284301757812, | |
| "logps/rejected": -282.2013244628906, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.4395980834960938, | |
| "rewards/margins": 16.99862289428711, | |
| "rewards/rejected": -20.438220977783203, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 2.6923076923076925, | |
| "grad_norm": 0.01446533203125, | |
| "learning_rate": 4.177564210123634e-06, | |
| "logits/chosen": -1.32615327835083, | |
| "logits/rejected": -0.8317953944206238, | |
| "logps/chosen": -72.77333068847656, | |
| "logps/rejected": -291.9281921386719, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.310988664627075, | |
| "rewards/margins": 17.982282638549805, | |
| "rewards/rejected": -21.293270111083984, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.713675213675214, | |
| "grad_norm": 0.00921630859375, | |
| "learning_rate": 4.133340648435789e-06, | |
| "logits/chosen": -1.3119795322418213, | |
| "logits/rejected": -0.7846705913543701, | |
| "logps/chosen": -74.24435424804688, | |
| "logps/rejected": -289.03887939453125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.4849464893341064, | |
| "rewards/margins": 17.662567138671875, | |
| "rewards/rejected": -21.147512435913086, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 2.735042735042735, | |
| "grad_norm": 0.005828857421875, | |
| "learning_rate": 4.092149360675402e-06, | |
| "logits/chosen": -1.2881155014038086, | |
| "logits/rejected": -0.8033782243728638, | |
| "logps/chosen": -79.22930908203125, | |
| "logps/rejected": -294.1476745605469, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.7118358612060547, | |
| "rewards/margins": 17.629985809326172, | |
| "rewards/rejected": -21.341821670532227, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.7564102564102564, | |
| "grad_norm": 0.019775390625, | |
| "learning_rate": 4.054013190907282e-06, | |
| "logits/chosen": -1.2686903476715088, | |
| "logits/rejected": -0.7805891633033752, | |
| "logps/chosen": -70.22049713134766, | |
| "logps/rejected": -284.9203796386719, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.1786301136016846, | |
| "rewards/margins": 17.73773956298828, | |
| "rewards/rejected": -20.916370391845703, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 2.7777777777777777, | |
| "grad_norm": 0.004241943359375, | |
| "learning_rate": 4.018953288874035e-06, | |
| "logits/chosen": -1.3032779693603516, | |
| "logits/rejected": -0.8233755230903625, | |
| "logps/chosen": -74.59503936767578, | |
| "logps/rejected": -288.961669921875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.4693355560302734, | |
| "rewards/margins": 17.675161361694336, | |
| "rewards/rejected": -21.14449691772461, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.799145299145299, | |
| "grad_norm": 0.0040283203125, | |
| "learning_rate": 3.9869890982667385e-06, | |
| "logits/chosen": -1.310773491859436, | |
| "logits/rejected": -0.7524069547653198, | |
| "logps/chosen": -72.80381774902344, | |
| "logps/rejected": -290.0320739746094, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.345860004425049, | |
| "rewards/margins": 17.688016891479492, | |
| "rewards/rejected": -21.033876419067383, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 2.8205128205128203, | |
| "grad_norm": 0.0189208984375, | |
| "learning_rate": 3.9581383459417625e-06, | |
| "logits/chosen": -1.291512370109558, | |
| "logits/rejected": -0.790591299533844, | |
| "logps/chosen": -78.11724853515625, | |
| "logps/rejected": -297.84283447265625, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.7806007862091064, | |
| "rewards/margins": 18.05144500732422, | |
| "rewards/rejected": -21.832046508789062, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.841880341880342, | |
| "grad_norm": 0.00494384765625, | |
| "learning_rate": 3.932417032089722e-06, | |
| "logits/chosen": -1.3292133808135986, | |
| "logits/rejected": -0.8189595937728882, | |
| "logps/chosen": -76.23522186279297, | |
| "logps/rejected": -293.2325744628906, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.59516978263855, | |
| "rewards/margins": 17.823274612426758, | |
| "rewards/rejected": -21.418445587158203, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 2.8632478632478633, | |
| "grad_norm": 0.0029754638671875, | |
| "learning_rate": 3.909839421362017e-06, | |
| "logits/chosen": -1.2779964208602905, | |
| "logits/rejected": -0.7794166803359985, | |
| "logps/chosen": -74.2072982788086, | |
| "logps/rejected": -289.36956787109375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.46783185005188, | |
| "rewards/margins": 17.554330825805664, | |
| "rewards/rejected": -21.02216148376465, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.8846153846153846, | |
| "grad_norm": 0.038818359375, | |
| "learning_rate": 3.890418034959871e-06, | |
| "logits/chosen": -1.2737759351730347, | |
| "logits/rejected": -0.7384223937988281, | |
| "logps/chosen": -73.76658630371094, | |
| "logps/rejected": -286.06195068359375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.443812847137451, | |
| "rewards/margins": 17.49923324584961, | |
| "rewards/rejected": -20.943044662475586, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 2.905982905982906, | |
| "grad_norm": 0.06884765625, | |
| "learning_rate": 3.874163643690263e-06, | |
| "logits/chosen": -1.255707025527954, | |
| "logits/rejected": -0.7339369654655457, | |
| "logps/chosen": -80.28028869628906, | |
| "logps/rejected": -291.90179443359375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.9126086235046387, | |
| "rewards/margins": 17.456329345703125, | |
| "rewards/rejected": -21.368938446044922, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.9273504273504276, | |
| "grad_norm": 0.04296875, | |
| "learning_rate": 3.861085261992599e-06, | |
| "logits/chosen": -1.306028127670288, | |
| "logits/rejected": -0.8490394353866577, | |
| "logps/chosen": -78.53582763671875, | |
| "logps/rejected": -289.38330078125, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.830167770385742, | |
| "rewards/margins": 17.217695236206055, | |
| "rewards/rejected": -21.047863006591797, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 2.948717948717949, | |
| "grad_norm": 0.12255859375, | |
| "learning_rate": 3.851190142939442e-06, | |
| "logits/chosen": -1.2999136447906494, | |
| "logits/rejected": -0.8062965273857117, | |
| "logps/chosen": -72.80134582519531, | |
| "logps/rejected": -288.02532958984375, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.2750275135040283, | |
| "rewards/margins": 17.672157287597656, | |
| "rewards/rejected": -20.94718360900879, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.97008547008547, | |
| "grad_norm": 0.001434326171875, | |
| "learning_rate": 3.844483774214069e-06, | |
| "logits/chosen": -1.2856634855270386, | |
| "logits/rejected": -0.7375695705413818, | |
| "logps/chosen": -72.40345764160156, | |
| "logps/rejected": -288.3551940917969, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.333986759185791, | |
| "rewards/margins": 17.785888671875, | |
| "rewards/rejected": -21.119874954223633, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 2.9871794871794872, | |
| "eval_logits/chosen": -1.3524901866912842, | |
| "eval_logits/rejected": -0.8189607262611389, | |
| "eval_logps/chosen": -73.21478271484375, | |
| "eval_logps/rejected": -285.69122314453125, | |
| "eval_loss": 0.00020370040147099644, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": -3.377105474472046, | |
| "eval_rewards/margins": 17.390329360961914, | |
| "eval_rewards/rejected": -20.767436981201172, | |
| "eval_runtime": 9.5425, | |
| "eval_samples_per_second": 20.959, | |
| "eval_steps_per_second": 20.959, | |
| "step": 699 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 702, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 12, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |