| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 100, |
| "global_step": 309, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.003236245954692557, |
| "grad_norm": 44.84460324756473, |
| "learning_rate": 1.6129032258064514e-08, |
| "logits/chosen": -0.20905712246894836, |
| "logits/rejected": -0.22190234065055847, |
| "logps/chosen": -51.62083435058594, |
| "logps/rejected": -51.69921112060547, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.032362459546925564, |
| "grad_norm": 40.23570147271882, |
| "learning_rate": 1.6129032258064515e-07, |
| "logits/chosen": -0.5052363872528076, |
| "logits/rejected": -0.4759008586406708, |
| "logps/chosen": -117.98110961914062, |
| "logps/rejected": -115.17385864257812, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": -0.005904653575271368, |
| "rewards/margins": -0.005229531321674585, |
| "rewards/rejected": -0.0006751217297278345, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.06472491909385113, |
| "grad_norm": 40.64958006423696, |
| "learning_rate": 3.225806451612903e-07, |
| "logits/chosen": -0.34268108010292053, |
| "logits/rejected": -0.32415661215782166, |
| "logps/chosen": -89.46002960205078, |
| "logps/rejected": -90.85234069824219, |
| "loss": 0.6918, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": 0.012587952427566051, |
| "rewards/margins": -0.0015765244606882334, |
| "rewards/rejected": 0.014164477586746216, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0970873786407767, |
| "grad_norm": 44.44417549342928, |
| "learning_rate": 4.838709677419355e-07, |
| "logits/chosen": -0.3697855770587921, |
| "logits/rejected": -0.37569430470466614, |
| "logps/chosen": -91.7381362915039, |
| "logps/rejected": -120.64210510253906, |
| "loss": 0.6917, |
| "rewards/accuracies": 0.44999998807907104, |
| "rewards/chosen": 0.040541667491197586, |
| "rewards/margins": -0.00646995147690177, |
| "rewards/rejected": 0.04701162129640579, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.12944983818770225, |
| "grad_norm": 45.39432157926112, |
| "learning_rate": 4.838129496402878e-07, |
| "logits/chosen": -0.5134055614471436, |
| "logits/rejected": -0.5195242166519165, |
| "logps/chosen": -112.23564147949219, |
| "logps/rejected": -112.45448303222656, |
| "loss": 0.6845, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": 0.1366191953420639, |
| "rewards/margins": 0.0211743526160717, |
| "rewards/rejected": 0.1154448390007019, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.16181229773462782, |
| "grad_norm": 46.657888703309055, |
| "learning_rate": 4.6582733812949637e-07, |
| "logits/chosen": -0.49087825417518616, |
| "logits/rejected": -0.48370131850242615, |
| "logps/chosen": -108.74371337890625, |
| "logps/rejected": -108.59181213378906, |
| "loss": 0.6816, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.28128570318222046, |
| "rewards/margins": 0.055680472403764725, |
| "rewards/rejected": 0.22560521960258484, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1941747572815534, |
| "grad_norm": 45.70846809487506, |
| "learning_rate": 4.4784172661870503e-07, |
| "logits/chosen": -0.5000173449516296, |
| "logits/rejected": -0.44659870862960815, |
| "logps/chosen": -109.87890625, |
| "logps/rejected": -103.29914855957031, |
| "loss": 0.6765, |
| "rewards/accuracies": 0.4749999940395355, |
| "rewards/chosen": 0.34991931915283203, |
| "rewards/margins": -0.002201000927016139, |
| "rewards/rejected": 0.3521203100681305, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.22653721682847897, |
| "grad_norm": 46.326027074357874, |
| "learning_rate": 4.2985611510791364e-07, |
| "logits/chosen": -0.42392462491989136, |
| "logits/rejected": -0.4413270056247711, |
| "logps/chosen": -106.632568359375, |
| "logps/rejected": -116.46354675292969, |
| "loss": 0.6766, |
| "rewards/accuracies": 0.4749999940395355, |
| "rewards/chosen": 0.4661247134208679, |
| "rewards/margins": 0.0015508796786889434, |
| "rewards/rejected": 0.4645739197731018, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2588996763754045, |
| "grad_norm": 43.498065814007035, |
| "learning_rate": 4.118705035971223e-07, |
| "logits/chosen": -0.3981110453605652, |
| "logits/rejected": -0.38208064436912537, |
| "logps/chosen": -77.17626953125, |
| "logps/rejected": -82.09029388427734, |
| "loss": 0.673, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": 0.3067065179347992, |
| "rewards/margins": 0.007025508675724268, |
| "rewards/rejected": 0.29968103766441345, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2912621359223301, |
| "grad_norm": 44.075241479789, |
| "learning_rate": 3.938848920863309e-07, |
| "logits/chosen": -0.34064334630966187, |
| "logits/rejected": -0.39144274592399597, |
| "logps/chosen": -92.52304077148438, |
| "logps/rejected": -98.45548248291016, |
| "loss": 0.6704, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.3761943280696869, |
| "rewards/margins": 0.042388152331113815, |
| "rewards/rejected": 0.33380621671676636, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.32362459546925565, |
| "grad_norm": 48.0790851847465, |
| "learning_rate": 3.7589928057553957e-07, |
| "logits/chosen": -0.37802955508232117, |
| "logits/rejected": -0.4160170555114746, |
| "logps/chosen": -113.50125885009766, |
| "logps/rejected": -121.9982681274414, |
| "loss": 0.6773, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": 0.6942413449287415, |
| "rewards/margins": 0.051541488617658615, |
| "rewards/rejected": 0.6426998972892761, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3559870550161812, |
| "grad_norm": 46.010936459755236, |
| "learning_rate": 3.579136690647482e-07, |
| "logits/chosen": -0.42405351996421814, |
| "logits/rejected": -0.40491142868995667, |
| "logps/chosen": -103.0141830444336, |
| "logps/rejected": -107.39582824707031, |
| "loss": 0.6689, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": 0.49587029218673706, |
| "rewards/margins": 0.06293975561857224, |
| "rewards/rejected": 0.4329305589199066, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3883495145631068, |
| "grad_norm": 45.68665961151184, |
| "learning_rate": 3.3992805755395684e-07, |
| "logits/chosen": -0.4767892360687256, |
| "logits/rejected": -0.4402199387550354, |
| "logps/chosen": -88.38746643066406, |
| "logps/rejected": -92.61766052246094, |
| "loss": 0.6499, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.7922600507736206, |
| "rewards/margins": 0.2213023602962494, |
| "rewards/rejected": 0.5709576606750488, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.42071197411003236, |
| "grad_norm": 50.16095862731605, |
| "learning_rate": 3.2194244604316545e-07, |
| "logits/chosen": -0.37711650133132935, |
| "logits/rejected": -0.3402002155780792, |
| "logps/chosen": -90.22920227050781, |
| "logps/rejected": -94.65870666503906, |
| "loss": 0.674, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 0.6651551127433777, |
| "rewards/margins": 0.20479026436805725, |
| "rewards/rejected": 0.4603648781776428, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.45307443365695793, |
| "grad_norm": 48.808328658203315, |
| "learning_rate": 3.039568345323741e-07, |
| "logits/chosen": -0.43002423644065857, |
| "logits/rejected": -0.45816200971603394, |
| "logps/chosen": -113.88044738769531, |
| "logps/rejected": -114.7729263305664, |
| "loss": 0.6533, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": 0.5868810415267944, |
| "rewards/margins": 0.1377699077129364, |
| "rewards/rejected": 0.44911113381385803, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.4854368932038835, |
| "grad_norm": 49.604479223239935, |
| "learning_rate": 2.859712230215827e-07, |
| "logits/chosen": -0.45960181951522827, |
| "logits/rejected": -0.4378342628479004, |
| "logps/chosen": -84.22222900390625, |
| "logps/rejected": -86.26544189453125, |
| "loss": 0.6639, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": 0.5674360990524292, |
| "rewards/margins": 0.11405378580093384, |
| "rewards/rejected": 0.45338231325149536, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.517799352750809, |
| "grad_norm": 47.42218438472324, |
| "learning_rate": 2.679856115107914e-07, |
| "logits/chosen": -0.3416453003883362, |
| "logits/rejected": -0.3230029344558716, |
| "logps/chosen": -80.31494903564453, |
| "logps/rejected": -82.15327453613281, |
| "loss": 0.6573, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": 0.586269736289978, |
| "rewards/margins": 0.0887608677148819, |
| "rewards/rejected": 0.49750882387161255, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5501618122977346, |
| "grad_norm": 47.03382774502366, |
| "learning_rate": 2.5e-07, |
| "logits/chosen": -0.41951996088027954, |
| "logits/rejected": -0.3912803530693054, |
| "logps/chosen": -87.18314361572266, |
| "logps/rejected": -93.79847717285156, |
| "loss": 0.6592, |
| "rewards/accuracies": 0.4749999940395355, |
| "rewards/chosen": 0.5363109707832336, |
| "rewards/margins": 0.067426897585392, |
| "rewards/rejected": 0.4688839912414551, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5825242718446602, |
| "grad_norm": 47.69366741108912, |
| "learning_rate": 2.3201438848920862e-07, |
| "logits/chosen": -0.3929893374443054, |
| "logits/rejected": -0.4231534004211426, |
| "logps/chosen": -126.5281753540039, |
| "logps/rejected": -131.83119201660156, |
| "loss": 0.6558, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": 0.5387030839920044, |
| "rewards/margins": 0.09579172730445862, |
| "rewards/rejected": 0.4429113268852234, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6148867313915858, |
| "grad_norm": 44.68169856182738, |
| "learning_rate": 2.1402877697841726e-07, |
| "logits/chosen": -0.44879454374313354, |
| "logits/rejected": -0.4319379925727844, |
| "logps/chosen": -92.65638732910156, |
| "logps/rejected": -87.41607666015625, |
| "loss": 0.6387, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.4598053991794586, |
| "rewards/margins": 0.2478960007429123, |
| "rewards/rejected": 0.21190936863422394, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.6472491909385113, |
| "grad_norm": 52.43786206399067, |
| "learning_rate": 1.960431654676259e-07, |
| "logits/chosen": -0.4231666028499603, |
| "logits/rejected": -0.4151372015476227, |
| "logps/chosen": -89.03497314453125, |
| "logps/rejected": -94.46099853515625, |
| "loss": 0.638, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": 0.564118504524231, |
| "rewards/margins": 0.19040581583976746, |
| "rewards/rejected": 0.3737126588821411, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6796116504854369, |
| "grad_norm": 47.830444290918436, |
| "learning_rate": 1.7805755395683453e-07, |
| "logits/chosen": -0.37914031744003296, |
| "logits/rejected": -0.3839500844478607, |
| "logps/chosen": -104.35710144042969, |
| "logps/rejected": -101.65086364746094, |
| "loss": 0.6557, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": 0.5819724202156067, |
| "rewards/margins": 0.11125577986240387, |
| "rewards/rejected": 0.470716655254364, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.7119741100323624, |
| "grad_norm": 56.9972848370308, |
| "learning_rate": 1.6007194244604316e-07, |
| "logits/chosen": -0.4811418950557709, |
| "logits/rejected": -0.4631536900997162, |
| "logps/chosen": -79.82476806640625, |
| "logps/rejected": -77.16559600830078, |
| "loss": 0.6487, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": 0.5332338213920593, |
| "rewards/margins": 0.061274897307157516, |
| "rewards/rejected": 0.47195887565612793, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.7443365695792881, |
| "grad_norm": 40.95527477894272, |
| "learning_rate": 1.420863309352518e-07, |
| "logits/chosen": -0.4873018264770508, |
| "logits/rejected": -0.48534002900123596, |
| "logps/chosen": -97.24694061279297, |
| "logps/rejected": -99.6893310546875, |
| "loss": 0.6618, |
| "rewards/accuracies": 0.4000000059604645, |
| "rewards/chosen": 0.36339443922042847, |
| "rewards/margins": 0.03903265669941902, |
| "rewards/rejected": 0.32436177134513855, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.7766990291262136, |
| "grad_norm": 51.5213560789745, |
| "learning_rate": 1.2410071942446043e-07, |
| "logits/chosen": -0.49588823318481445, |
| "logits/rejected": -0.4999016225337982, |
| "logps/chosen": -109.93338775634766, |
| "logps/rejected": -112.18772888183594, |
| "loss": 0.6535, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": 0.6505969762802124, |
| "rewards/margins": 0.1298864632844925, |
| "rewards/rejected": 0.5207104682922363, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.8090614886731392, |
| "grad_norm": 46.03625656489629, |
| "learning_rate": 1.0611510791366907e-07, |
| "logits/chosen": -0.3793638348579407, |
| "logits/rejected": -0.38035714626312256, |
| "logps/chosen": -98.56913757324219, |
| "logps/rejected": -103.80790710449219, |
| "loss": 0.6503, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": 0.5021631717681885, |
| "rewards/margins": 0.1653563678264618, |
| "rewards/rejected": 0.33680686354637146, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.8414239482200647, |
| "grad_norm": 46.447929343856416, |
| "learning_rate": 8.812949640287769e-08, |
| "logits/chosen": -0.45624303817749023, |
| "logits/rejected": -0.4315834641456604, |
| "logps/chosen": -83.76708984375, |
| "logps/rejected": -94.65506744384766, |
| "loss": 0.6707, |
| "rewards/accuracies": 0.4749999940395355, |
| "rewards/chosen": 0.530864953994751, |
| "rewards/margins": 0.03900914266705513, |
| "rewards/rejected": 0.4918558597564697, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.8737864077669902, |
| "grad_norm": 48.805366010941604, |
| "learning_rate": 7.014388489208632e-08, |
| "logits/chosen": -0.42842593789100647, |
| "logits/rejected": -0.40372419357299805, |
| "logps/chosen": -96.93135833740234, |
| "logps/rejected": -99.37718200683594, |
| "loss": 0.6434, |
| "rewards/accuracies": 0.5249999761581421, |
| "rewards/chosen": 0.4281619191169739, |
| "rewards/margins": 0.1487235724925995, |
| "rewards/rejected": 0.2794383466243744, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.9061488673139159, |
| "grad_norm": 47.872792319475785, |
| "learning_rate": 5.2158273381294966e-08, |
| "logits/chosen": -0.4138847291469574, |
| "logits/rejected": -0.45293694734573364, |
| "logps/chosen": -102.27735137939453, |
| "logps/rejected": -107.4918212890625, |
| "loss": 0.6588, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": 0.5359721183776855, |
| "rewards/margins": 0.1897757351398468, |
| "rewards/rejected": 0.34619635343551636, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.9385113268608414, |
| "grad_norm": 47.988007458372486, |
| "learning_rate": 3.41726618705036e-08, |
| "logits/chosen": -0.4650436341762543, |
| "logits/rejected": -0.4407349228858948, |
| "logps/chosen": -141.83694458007812, |
| "logps/rejected": -126.32550048828125, |
| "loss": 0.6488, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 0.32771816849708557, |
| "rewards/margins": 0.28930023312568665, |
| "rewards/rejected": 0.038417913019657135, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.970873786407767, |
| "grad_norm": 50.705167138943, |
| "learning_rate": 1.618705035971223e-08, |
| "logits/chosen": -0.4262828230857849, |
| "logits/rejected": -0.4605466425418854, |
| "logps/chosen": -109.3924560546875, |
| "logps/rejected": -106.82768249511719, |
| "loss": 0.6685, |
| "rewards/accuracies": 0.4749999940395355, |
| "rewards/chosen": 0.4629640579223633, |
| "rewards/margins": 0.06305359303951263, |
| "rewards/rejected": 0.39991044998168945, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 309, |
| "total_flos": 0.0, |
| "train_loss": 0.6613213452706445, |
| "train_runtime": 2759.9915, |
| "train_samples_per_second": 7.162, |
| "train_steps_per_second": 0.112 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 309, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|