| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.981366459627329, | |
| "eval_steps": 50, | |
| "global_step": 120, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.12422360248447205, | |
| "grad_norm": 54.367663803058946, | |
| "learning_rate": 5e-07, | |
| "logits/chosen": -2.7148144245147705, | |
| "logits/rejected": -2.7243547439575195, | |
| "logps/chosen": -242.867431640625, | |
| "logps/rejected": -227.12136840820312, | |
| "loss": 0.691, | |
| "rewards/accuracies": 0.3125, | |
| "rewards/chosen": 0.009815122000873089, | |
| "rewards/margins": 0.005822173319756985, | |
| "rewards/rejected": 0.003992948215454817, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.2484472049689441, | |
| "grad_norm": 51.804115964444165, | |
| "learning_rate": 1e-06, | |
| "logits/chosen": -2.6798529624938965, | |
| "logits/rejected": -2.703315258026123, | |
| "logps/chosen": -256.2458190917969, | |
| "logps/rejected": -217.85592651367188, | |
| "loss": 0.6456, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.43589210510253906, | |
| "rewards/margins": 0.08772359788417816, | |
| "rewards/rejected": 0.3481685519218445, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.37267080745341613, | |
| "grad_norm": 50.53080123806113, | |
| "learning_rate": 9.949107209404663e-07, | |
| "logits/chosen": -2.5799756050109863, | |
| "logits/rejected": -2.565157651901245, | |
| "logps/chosen": -237.31692504882812, | |
| "logps/rejected": -208.06655883789062, | |
| "loss": 0.6378, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": 1.4567431211471558, | |
| "rewards/margins": 0.7478972673416138, | |
| "rewards/rejected": 0.7088459730148315, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.4968944099378882, | |
| "grad_norm": 54.57054056014394, | |
| "learning_rate": 9.797464868072486e-07, | |
| "logits/chosen": -2.4506874084472656, | |
| "logits/rejected": -2.433974027633667, | |
| "logps/chosen": -247.51657104492188, | |
| "logps/rejected": -216.2230987548828, | |
| "loss": 0.6587, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": 1.5972602367401123, | |
| "rewards/margins": 1.0024363994598389, | |
| "rewards/rejected": 0.594823956489563, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.6211180124223602, | |
| "grad_norm": 44.117531702150536, | |
| "learning_rate": 9.548159976772592e-07, | |
| "logits/chosen": -2.4157333374023438, | |
| "logits/rejected": -2.3935298919677734, | |
| "logps/chosen": -231.5720672607422, | |
| "logps/rejected": -216.5135498046875, | |
| "loss": 0.6622, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.7699011564254761, | |
| "rewards/margins": 0.8483353853225708, | |
| "rewards/rejected": -0.07843427360057831, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.7453416149068323, | |
| "grad_norm": 46.22604593677178, | |
| "learning_rate": 9.206267664155906e-07, | |
| "logits/chosen": -2.4077823162078857, | |
| "logits/rejected": -2.4088187217712402, | |
| "logps/chosen": -260.6187744140625, | |
| "logps/rejected": -240.7838897705078, | |
| "loss": 0.6343, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 0.9798136949539185, | |
| "rewards/margins": 1.1557605266571045, | |
| "rewards/rejected": -0.17594675719738007, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.8695652173913043, | |
| "grad_norm": 45.738597782002316, | |
| "learning_rate": 8.778747871771291e-07, | |
| "logits/chosen": -2.4890403747558594, | |
| "logits/rejected": -2.4890661239624023, | |
| "logps/chosen": -267.4264831542969, | |
| "logps/rejected": -236.0730743408203, | |
| "loss": 0.6245, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": 0.5028200745582581, | |
| "rewards/margins": 1.037217617034912, | |
| "rewards/rejected": -0.5343974232673645, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.9937888198757764, | |
| "grad_norm": 40.58782325478915, | |
| "learning_rate": 8.274303669726426e-07, | |
| "logits/chosen": -2.464543104171753, | |
| "logits/rejected": -2.45270037651062, | |
| "logps/chosen": -246.43997192382812, | |
| "logps/rejected": -244.5944366455078, | |
| "loss": 0.6076, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": 0.423252671957016, | |
| "rewards/margins": 0.9229713678359985, | |
| "rewards/rejected": -0.49971866607666016, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.1180124223602483, | |
| "grad_norm": 21.75862492001889, | |
| "learning_rate": 7.703204087277988e-07, | |
| "logits/chosen": -2.437509059906006, | |
| "logits/rejected": -2.4511005878448486, | |
| "logps/chosen": -238.031005859375, | |
| "logps/rejected": -239.1492462158203, | |
| "loss": 0.2467, | |
| "rewards/accuracies": 0.887499988079071, | |
| "rewards/chosen": 1.3256285190582275, | |
| "rewards/margins": 2.4453110694885254, | |
| "rewards/rejected": -1.1196826696395874, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 1.2422360248447206, | |
| "grad_norm": 22.132976615768026, | |
| "learning_rate": 7.077075065009433e-07, | |
| "logits/chosen": -2.484419822692871, | |
| "logits/rejected": -2.485710859298706, | |
| "logps/chosen": -241.15200805664062, | |
| "logps/rejected": -220.26907348632812, | |
| "loss": 0.2364, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": 1.7490062713623047, | |
| "rewards/margins": 2.5016419887542725, | |
| "rewards/rejected": -0.7526359558105469, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.2422360248447206, | |
| "eval_logits/chosen": -2.5087192058563232, | |
| "eval_logits/rejected": -2.515753746032715, | |
| "eval_logps/chosen": -249.35264587402344, | |
| "eval_logps/rejected": -202.0917205810547, | |
| "eval_loss": 0.5729268789291382, | |
| "eval_rewards/accuracies": 0.7291666865348816, | |
| "eval_rewards/chosen": 1.2346218824386597, | |
| "eval_rewards/margins": 1.0999401807785034, | |
| "eval_rewards/rejected": 0.13468176126480103, | |
| "eval_runtime": 75.094, | |
| "eval_samples_per_second": 15.181, | |
| "eval_steps_per_second": 0.24, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.3664596273291925, | |
| "grad_norm": 17.330600410265617, | |
| "learning_rate": 6.408662784207149e-07, | |
| "logits/chosen": -2.4883952140808105, | |
| "logits/rejected": -2.482597827911377, | |
| "logps/chosen": -236.4322052001953, | |
| "logps/rejected": -229.5203094482422, | |
| "loss": 0.2196, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 2.2716193199157715, | |
| "rewards/margins": 3.196570634841919, | |
| "rewards/rejected": -0.9249511957168579, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 1.4906832298136645, | |
| "grad_norm": 17.716660362051734, | |
| "learning_rate": 5.711574191366427e-07, | |
| "logits/chosen": -2.468207836151123, | |
| "logits/rejected": -2.4725213050842285, | |
| "logps/chosen": -222.43896484375, | |
| "logps/rejected": -200.0598602294922, | |
| "loss": 0.2119, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 2.1822891235351562, | |
| "rewards/margins": 2.8318796157836914, | |
| "rewards/rejected": -0.6495904922485352, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.6149068322981366, | |
| "grad_norm": 23.32765774015972, | |
| "learning_rate": 5e-07, | |
| "logits/chosen": -2.5094847679138184, | |
| "logits/rejected": -2.5222580432891846, | |
| "logps/chosen": -236.4397430419922, | |
| "logps/rejected": -216.79052734375, | |
| "loss": 0.2118, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/chosen": 2.586951494216919, | |
| "rewards/margins": 3.453221559524536, | |
| "rewards/rejected": -0.8662700653076172, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.7391304347826086, | |
| "grad_norm": 25.27024243839641, | |
| "learning_rate": 4.2884258086335745e-07, | |
| "logits/chosen": -2.506361484527588, | |
| "logits/rejected": -2.4916276931762695, | |
| "logps/chosen": -228.9758758544922, | |
| "logps/rejected": -225.41006469726562, | |
| "loss": 0.2294, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 2.325880527496338, | |
| "rewards/margins": 3.3348236083984375, | |
| "rewards/rejected": -1.0089433193206787, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.8633540372670807, | |
| "grad_norm": 27.860922972380834, | |
| "learning_rate": 3.591337215792851e-07, | |
| "logits/chosen": -2.5197181701660156, | |
| "logits/rejected": -2.5090882778167725, | |
| "logps/chosen": -239.81277465820312, | |
| "logps/rejected": -230.70059204101562, | |
| "loss": 0.265, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 2.431699275970459, | |
| "rewards/margins": 3.363804340362549, | |
| "rewards/rejected": -0.9321050643920898, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.9875776397515528, | |
| "grad_norm": 17.61138833178944, | |
| "learning_rate": 2.922924934990568e-07, | |
| "logits/chosen": -2.543259382247925, | |
| "logits/rejected": -2.493022918701172, | |
| "logps/chosen": -237.87887573242188, | |
| "logps/rejected": -279.49261474609375, | |
| "loss": 0.2128, | |
| "rewards/accuracies": 0.918749988079071, | |
| "rewards/chosen": 2.389310836791992, | |
| "rewards/margins": 4.381407260894775, | |
| "rewards/rejected": -1.9920963048934937, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 2.111801242236025, | |
| "grad_norm": 13.031340899683215, | |
| "learning_rate": 2.2967959127220137e-07, | |
| "logits/chosen": -2.5387518405914307, | |
| "logits/rejected": -2.5558295249938965, | |
| "logps/chosen": -225.1177520751953, | |
| "logps/rejected": -233.97073364257812, | |
| "loss": 0.1297, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/chosen": 2.17976713180542, | |
| "rewards/margins": 3.992032527923584, | |
| "rewards/rejected": -1.812265396118164, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 2.2360248447204967, | |
| "grad_norm": 16.033450629688048, | |
| "learning_rate": 1.725696330273575e-07, | |
| "logits/chosen": -2.5489468574523926, | |
| "logits/rejected": -2.5377697944641113, | |
| "logps/chosen": -255.279296875, | |
| "logps/rejected": -245.3787078857422, | |
| "loss": 0.123, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 2.2218708992004395, | |
| "rewards/margins": 4.716561794281006, | |
| "rewards/rejected": -2.4946906566619873, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.360248447204969, | |
| "grad_norm": 12.307639352242482, | |
| "learning_rate": 1.2212521282287093e-07, | |
| "logits/chosen": -2.5559678077697754, | |
| "logits/rejected": -2.5754735469818115, | |
| "logps/chosen": -240.5291748046875, | |
| "logps/rejected": -270.05230712890625, | |
| "loss": 0.1073, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 1.9212055206298828, | |
| "rewards/margins": 4.681941032409668, | |
| "rewards/rejected": -2.760735511779785, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 2.4844720496894412, | |
| "grad_norm": 12.425204577942079, | |
| "learning_rate": 7.937323358440934e-08, | |
| "logits/chosen": -2.549752950668335, | |
| "logits/rejected": -2.5533714294433594, | |
| "logps/chosen": -235.24368286132812, | |
| "logps/rejected": -259.5509033203125, | |
| "loss": 0.1061, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 1.9440858364105225, | |
| "rewards/margins": 4.815189361572266, | |
| "rewards/rejected": -2.8711037635803223, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.4844720496894412, | |
| "eval_logits/chosen": -2.55989933013916, | |
| "eval_logits/rejected": -2.5775303840637207, | |
| "eval_logps/chosen": -254.12814331054688, | |
| "eval_logps/rejected": -212.31497192382812, | |
| "eval_loss": 0.6158778071403503, | |
| "eval_rewards/accuracies": 0.7569444179534912, | |
| "eval_rewards/chosen": 0.7570738792419434, | |
| "eval_rewards/margins": 1.6447181701660156, | |
| "eval_rewards/rejected": -0.8876442313194275, | |
| "eval_runtime": 74.8651, | |
| "eval_samples_per_second": 15.227, | |
| "eval_steps_per_second": 0.24, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.608695652173913, | |
| "grad_norm": 18.01907950225221, | |
| "learning_rate": 4.518400232274078e-08, | |
| "logits/chosen": -2.546436309814453, | |
| "logits/rejected": -2.5362162590026855, | |
| "logps/chosen": -227.8841552734375, | |
| "logps/rejected": -236.8609619140625, | |
| "loss": 0.1288, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 1.770033836364746, | |
| "rewards/margins": 4.3705244064331055, | |
| "rewards/rejected": -2.6004908084869385, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 2.732919254658385, | |
| "grad_norm": 17.707001673219747, | |
| "learning_rate": 2.025351319275137e-08, | |
| "logits/chosen": -2.5263776779174805, | |
| "logits/rejected": -2.5271897315979004, | |
| "logps/chosen": -243.36788940429688, | |
| "logps/rejected": -254.2205352783203, | |
| "loss": 0.1263, | |
| "rewards/accuracies": 0.956250011920929, | |
| "rewards/chosen": 1.7786777019500732, | |
| "rewards/margins": 4.2846503257751465, | |
| "rewards/rejected": -2.505972385406494, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 16.228515302877508, | |
| "learning_rate": 5.0892790595336575e-09, | |
| "logits/chosen": -2.5431525707244873, | |
| "logits/rejected": -2.5375916957855225, | |
| "logps/chosen": -234.5476531982422, | |
| "logps/rejected": -246.033447265625, | |
| "loss": 0.113, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 1.938812494277954, | |
| "rewards/margins": 4.491750240325928, | |
| "rewards/rejected": -2.552938461303711, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 2.981366459627329, | |
| "grad_norm": 15.40676083530787, | |
| "learning_rate": 0.0, | |
| "logits/chosen": -2.529771089553833, | |
| "logits/rejected": -2.5455727577209473, | |
| "logps/chosen": -242.18539428710938, | |
| "logps/rejected": -240.2424774169922, | |
| "loss": 0.1225, | |
| "rewards/accuracies": 0.9437500238418579, | |
| "rewards/chosen": 2.026702642440796, | |
| "rewards/margins": 4.205595970153809, | |
| "rewards/rejected": -2.178893566131592, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.981366459627329, | |
| "step": 120, | |
| "total_flos": 1414680891359232.0, | |
| "train_loss": 0.3313312023878098, | |
| "train_runtime": 4261.9082, | |
| "train_samples_per_second": 7.22, | |
| "train_steps_per_second": 0.028 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 120, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1414680891359232.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |