| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.4844720496894412, |
| "eval_steps": 50, |
| "global_step": 100, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.12422360248447205, |
| "grad_norm": 54.367663803058946, |
| "learning_rate": 5e-07, |
| "logits/chosen": -2.7148144245147705, |
| "logits/rejected": -2.7243547439575195, |
| "logps/chosen": -242.867431640625, |
| "logps/rejected": -227.12136840820312, |
| "loss": 0.691, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": 0.009815122000873089, |
| "rewards/margins": 0.005822173319756985, |
| "rewards/rejected": 0.003992948215454817, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.2484472049689441, |
| "grad_norm": 51.804115964444165, |
| "learning_rate": 1e-06, |
| "logits/chosen": -2.6798529624938965, |
| "logits/rejected": -2.703315258026123, |
| "logps/chosen": -256.2458190917969, |
| "logps/rejected": -217.85592651367188, |
| "loss": 0.6456, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": 0.43589210510253906, |
| "rewards/margins": 0.08772359788417816, |
| "rewards/rejected": 0.3481685519218445, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.37267080745341613, |
| "grad_norm": 50.53080123806113, |
| "learning_rate": 9.949107209404663e-07, |
| "logits/chosen": -2.5799756050109863, |
| "logits/rejected": -2.565157651901245, |
| "logps/chosen": -237.31692504882812, |
| "logps/rejected": -208.06655883789062, |
| "loss": 0.6378, |
| "rewards/accuracies": 0.706250011920929, |
| "rewards/chosen": 1.4567431211471558, |
| "rewards/margins": 0.7478972673416138, |
| "rewards/rejected": 0.7088459730148315, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.4968944099378882, |
| "grad_norm": 54.57054056014394, |
| "learning_rate": 9.797464868072486e-07, |
| "logits/chosen": -2.4506874084472656, |
| "logits/rejected": -2.433974027633667, |
| "logps/chosen": -247.51657104492188, |
| "logps/rejected": -216.2230987548828, |
| "loss": 0.6587, |
| "rewards/accuracies": 0.731249988079071, |
| "rewards/chosen": 1.5972602367401123, |
| "rewards/margins": 1.0024363994598389, |
| "rewards/rejected": 0.594823956489563, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.6211180124223602, |
| "grad_norm": 44.117531702150536, |
| "learning_rate": 9.548159976772592e-07, |
| "logits/chosen": -2.4157333374023438, |
| "logits/rejected": -2.3935298919677734, |
| "logps/chosen": -231.5720672607422, |
| "logps/rejected": -216.5135498046875, |
| "loss": 0.6622, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.7699011564254761, |
| "rewards/margins": 0.8483353853225708, |
| "rewards/rejected": -0.07843427360057831, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.7453416149068323, |
| "grad_norm": 46.22604593677178, |
| "learning_rate": 9.206267664155906e-07, |
| "logits/chosen": -2.4077823162078857, |
| "logits/rejected": -2.4088187217712402, |
| "logps/chosen": -260.6187744140625, |
| "logps/rejected": -240.7838897705078, |
| "loss": 0.6343, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 0.9798136949539185, |
| "rewards/margins": 1.1557605266571045, |
| "rewards/rejected": -0.17594675719738007, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.8695652173913043, |
| "grad_norm": 45.738597782002316, |
| "learning_rate": 8.778747871771291e-07, |
| "logits/chosen": -2.4890403747558594, |
| "logits/rejected": -2.4890661239624023, |
| "logps/chosen": -267.4264831542969, |
| "logps/rejected": -236.0730743408203, |
| "loss": 0.6245, |
| "rewards/accuracies": 0.731249988079071, |
| "rewards/chosen": 0.5028200745582581, |
| "rewards/margins": 1.037217617034912, |
| "rewards/rejected": -0.5343974232673645, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.9937888198757764, |
| "grad_norm": 40.58782325478915, |
| "learning_rate": 8.274303669726426e-07, |
| "logits/chosen": -2.464543104171753, |
| "logits/rejected": -2.45270037651062, |
| "logps/chosen": -246.43997192382812, |
| "logps/rejected": -244.5944366455078, |
| "loss": 0.6076, |
| "rewards/accuracies": 0.668749988079071, |
| "rewards/chosen": 0.423252671957016, |
| "rewards/margins": 0.9229713678359985, |
| "rewards/rejected": -0.49971866607666016, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.1180124223602483, |
| "grad_norm": 21.75862492001889, |
| "learning_rate": 7.703204087277988e-07, |
| "logits/chosen": -2.437509059906006, |
| "logits/rejected": -2.4511005878448486, |
| "logps/chosen": -238.031005859375, |
| "logps/rejected": -239.1492462158203, |
| "loss": 0.2467, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": 1.3256285190582275, |
| "rewards/margins": 2.4453110694885254, |
| "rewards/rejected": -1.1196826696395874, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.2422360248447206, |
| "grad_norm": 22.132976615768026, |
| "learning_rate": 7.077075065009433e-07, |
| "logits/chosen": -2.484419822692871, |
| "logits/rejected": -2.485710859298706, |
| "logps/chosen": -241.15200805664062, |
| "logps/rejected": -220.26907348632812, |
| "loss": 0.2364, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": 1.7490062713623047, |
| "rewards/margins": 2.5016419887542725, |
| "rewards/rejected": -0.7526359558105469, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.2422360248447206, |
| "eval_logits/chosen": -2.5087192058563232, |
| "eval_logits/rejected": -2.515753746032715, |
| "eval_logps/chosen": -249.35264587402344, |
| "eval_logps/rejected": -202.0917205810547, |
| "eval_loss": 0.5729268789291382, |
| "eval_rewards/accuracies": 0.7291666865348816, |
| "eval_rewards/chosen": 1.2346218824386597, |
| "eval_rewards/margins": 1.0999401807785034, |
| "eval_rewards/rejected": 0.13468176126480103, |
| "eval_runtime": 75.094, |
| "eval_samples_per_second": 15.181, |
| "eval_steps_per_second": 0.24, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.3664596273291925, |
| "grad_norm": 17.330600410265617, |
| "learning_rate": 6.408662784207149e-07, |
| "logits/chosen": -2.4883952140808105, |
| "logits/rejected": -2.482597827911377, |
| "logps/chosen": -236.4322052001953, |
| "logps/rejected": -229.5203094482422, |
| "loss": 0.2196, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 2.2716193199157715, |
| "rewards/margins": 3.196570634841919, |
| "rewards/rejected": -0.9249511957168579, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.4906832298136645, |
| "grad_norm": 17.716660362051734, |
| "learning_rate": 5.711574191366427e-07, |
| "logits/chosen": -2.468207836151123, |
| "logits/rejected": -2.4725213050842285, |
| "logps/chosen": -222.43896484375, |
| "logps/rejected": -200.0598602294922, |
| "loss": 0.2119, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 2.1822891235351562, |
| "rewards/margins": 2.8318796157836914, |
| "rewards/rejected": -0.6495904922485352, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.6149068322981366, |
| "grad_norm": 23.32765774015972, |
| "learning_rate": 5e-07, |
| "logits/chosen": -2.5094847679138184, |
| "logits/rejected": -2.5222580432891846, |
| "logps/chosen": -236.4397430419922, |
| "logps/rejected": -216.79052734375, |
| "loss": 0.2118, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": 2.586951494216919, |
| "rewards/margins": 3.453221559524536, |
| "rewards/rejected": -0.8662700653076172, |
| "step": 65 |
| }, |
| { |
| "epoch": 1.7391304347826086, |
| "grad_norm": 25.27024243839641, |
| "learning_rate": 4.2884258086335745e-07, |
| "logits/chosen": -2.506361484527588, |
| "logits/rejected": -2.4916276931762695, |
| "logps/chosen": -228.9758758544922, |
| "logps/rejected": -225.41006469726562, |
| "loss": 0.2294, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": 2.325880527496338, |
| "rewards/margins": 3.3348236083984375, |
| "rewards/rejected": -1.0089433193206787, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.8633540372670807, |
| "grad_norm": 27.860922972380834, |
| "learning_rate": 3.591337215792851e-07, |
| "logits/chosen": -2.5197181701660156, |
| "logits/rejected": -2.5090882778167725, |
| "logps/chosen": -239.81277465820312, |
| "logps/rejected": -230.70059204101562, |
| "loss": 0.265, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": 2.431699275970459, |
| "rewards/margins": 3.363804340362549, |
| "rewards/rejected": -0.9321050643920898, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.9875776397515528, |
| "grad_norm": 17.61138833178944, |
| "learning_rate": 2.922924934990568e-07, |
| "logits/chosen": -2.543259382247925, |
| "logits/rejected": -2.493022918701172, |
| "logps/chosen": -237.87887573242188, |
| "logps/rejected": -279.49261474609375, |
| "loss": 0.2128, |
| "rewards/accuracies": 0.918749988079071, |
| "rewards/chosen": 2.389310836791992, |
| "rewards/margins": 4.381407260894775, |
| "rewards/rejected": -1.9920963048934937, |
| "step": 80 |
| }, |
| { |
| "epoch": 2.111801242236025, |
| "grad_norm": 13.031340899683215, |
| "learning_rate": 2.2967959127220137e-07, |
| "logits/chosen": -2.5387518405914307, |
| "logits/rejected": -2.5558295249938965, |
| "logps/chosen": -225.1177520751953, |
| "logps/rejected": -233.97073364257812, |
| "loss": 0.1297, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": 2.17976713180542, |
| "rewards/margins": 3.992032527923584, |
| "rewards/rejected": -1.812265396118164, |
| "step": 85 |
| }, |
| { |
| "epoch": 2.2360248447204967, |
| "grad_norm": 16.033450629688048, |
| "learning_rate": 1.725696330273575e-07, |
| "logits/chosen": -2.5489468574523926, |
| "logits/rejected": -2.5377697944641113, |
| "logps/chosen": -255.279296875, |
| "logps/rejected": -245.3787078857422, |
| "loss": 0.123, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": 2.2218708992004395, |
| "rewards/margins": 4.716561794281006, |
| "rewards/rejected": -2.4946906566619873, |
| "step": 90 |
| }, |
| { |
| "epoch": 2.360248447204969, |
| "grad_norm": 12.307639352242482, |
| "learning_rate": 1.2212521282287093e-07, |
| "logits/chosen": -2.5559678077697754, |
| "logits/rejected": -2.5754735469818115, |
| "logps/chosen": -240.5291748046875, |
| "logps/rejected": -270.05230712890625, |
| "loss": 0.1073, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": 1.9212055206298828, |
| "rewards/margins": 4.681941032409668, |
| "rewards/rejected": -2.760735511779785, |
| "step": 95 |
| }, |
| { |
| "epoch": 2.4844720496894412, |
| "grad_norm": 12.425204577942079, |
| "learning_rate": 7.937323358440934e-08, |
| "logits/chosen": -2.549752950668335, |
| "logits/rejected": -2.5533714294433594, |
| "logps/chosen": -235.24368286132812, |
| "logps/rejected": -259.5509033203125, |
| "loss": 0.1061, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": 1.9440858364105225, |
| "rewards/margins": 4.815189361572266, |
| "rewards/rejected": -2.8711037635803223, |
| "step": 100 |
| }, |
| { |
| "epoch": 2.4844720496894412, |
| "eval_logits/chosen": -2.55989933013916, |
| "eval_logits/rejected": -2.5775303840637207, |
| "eval_logps/chosen": -254.12814331054688, |
| "eval_logps/rejected": -212.31497192382812, |
| "eval_loss": 0.6158778071403503, |
| "eval_rewards/accuracies": 0.7569444179534912, |
| "eval_rewards/chosen": 0.7570738792419434, |
| "eval_rewards/margins": 1.6447181701660156, |
| "eval_rewards/rejected": -0.8876442313194275, |
| "eval_runtime": 74.8651, |
| "eval_samples_per_second": 15.227, |
| "eval_steps_per_second": 0.24, |
| "step": 100 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 120, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1178822762299392.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|