| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.999917239096251, | |
| "eval_steps": 500, | |
| "global_step": 6041, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05015310767193578, | |
| "grad_norm": 4.3741655349731445, | |
| "learning_rate": 5.008264462809917e-07, | |
| "logits/chosen": -0.5391724109649658, | |
| "logits/rejected": -0.4776774048805237, | |
| "logps/chosen": -73.29891204833984, | |
| "logps/rejected": -13.118536949157715, | |
| "loss": 0.684, | |
| "rewards/accuracies": 0.589108943939209, | |
| "rewards/chosen": 0.016623780131340027, | |
| "rewards/margins": 0.01931975968182087, | |
| "rewards/rejected": -0.0026959802489727736, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.10030621534387156, | |
| "grad_norm": 3.5748157501220703, | |
| "learning_rate": 9.998160412067696e-07, | |
| "logits/chosen": -0.5328251719474792, | |
| "logits/rejected": -0.4811299741268158, | |
| "logps/chosen": -66.50557708740234, | |
| "logps/rejected": -15.574967384338379, | |
| "loss": 0.4581, | |
| "rewards/accuracies": 0.8675742745399475, | |
| "rewards/chosen": 0.6051633954048157, | |
| "rewards/margins": 0.8234596848487854, | |
| "rewards/rejected": -0.21829627454280853, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.15045932301580733, | |
| "grad_norm": 3.891946315765381, | |
| "learning_rate": 9.440765268579838e-07, | |
| "logits/chosen": -0.5248011946678162, | |
| "logits/rejected": -0.4659003019332886, | |
| "logps/chosen": -61.22736358642578, | |
| "logps/rejected": -34.781681060791016, | |
| "loss": 0.0781, | |
| "rewards/accuracies": 0.9843234419822693, | |
| "rewards/chosen": 1.5499842166900635, | |
| "rewards/margins": 3.709665298461914, | |
| "rewards/rejected": -2.1596810817718506, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.20061243068774312, | |
| "grad_norm": 0.11657057702541351, | |
| "learning_rate": 8.883370125091979e-07, | |
| "logits/chosen": -0.48526012897491455, | |
| "logits/rejected": -0.4090143144130707, | |
| "logps/chosen": -56.97617721557617, | |
| "logps/rejected": -48.22561264038086, | |
| "loss": 0.0338, | |
| "rewards/accuracies": 0.9867987036705017, | |
| "rewards/chosen": 1.691601276397705, | |
| "rewards/margins": 5.236583709716797, | |
| "rewards/rejected": -3.544982433319092, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.2507655383596789, | |
| "grad_norm": 0.1646522730588913, | |
| "learning_rate": 8.325974981604121e-07, | |
| "logits/chosen": -0.43419143557548523, | |
| "logits/rejected": -0.3306835889816284, | |
| "logps/chosen": -56.835514068603516, | |
| "logps/rejected": -55.82384490966797, | |
| "loss": 0.027, | |
| "rewards/accuracies": 0.9913366436958313, | |
| "rewards/chosen": 1.7533217668533325, | |
| "rewards/margins": 6.046737194061279, | |
| "rewards/rejected": -4.2934160232543945, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.30091864603161467, | |
| "grad_norm": 16.816612243652344, | |
| "learning_rate": 7.768579838116262e-07, | |
| "logits/chosen": -0.4011126756668091, | |
| "logits/rejected": -0.2737236022949219, | |
| "logps/chosen": -55.48396301269531, | |
| "logps/rejected": -65.16657257080078, | |
| "loss": 0.0307, | |
| "rewards/accuracies": 0.9892739653587341, | |
| "rewards/chosen": 1.696967601776123, | |
| "rewards/margins": 6.927124500274658, | |
| "rewards/rejected": -5.230156898498535, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 0.3510717537035504, | |
| "grad_norm": 2.5598514080047607, | |
| "learning_rate": 7.211184694628402e-07, | |
| "logits/chosen": -0.37529540061950684, | |
| "logits/rejected": -0.23136167228221893, | |
| "logps/chosen": -57.458560943603516, | |
| "logps/rejected": -76.030517578125, | |
| "loss": 0.0153, | |
| "rewards/accuracies": 0.9954620599746704, | |
| "rewards/chosen": 1.6613062620162964, | |
| "rewards/margins": 7.948617935180664, | |
| "rewards/rejected": -6.287312030792236, | |
| "step": 2121 | |
| }, | |
| { | |
| "epoch": 0.40122486137548624, | |
| "grad_norm": 0.03665272891521454, | |
| "learning_rate": 6.653789551140544e-07, | |
| "logits/chosen": -0.32625895738601685, | |
| "logits/rejected": -0.1696743667125702, | |
| "logps/chosen": -56.47751998901367, | |
| "logps/rejected": -82.79418182373047, | |
| "loss": 0.0185, | |
| "rewards/accuracies": 0.9929868578910828, | |
| "rewards/chosen": 1.56876802444458, | |
| "rewards/margins": 8.616827964782715, | |
| "rewards/rejected": -7.048060417175293, | |
| "step": 2424 | |
| }, | |
| { | |
| "epoch": 0.451377969047422, | |
| "grad_norm": 0.0068373712711036205, | |
| "learning_rate": 6.096394407652685e-07, | |
| "logits/chosen": -0.33455565571784973, | |
| "logits/rejected": -0.17350350320339203, | |
| "logps/chosen": -57.92192840576172, | |
| "logps/rejected": -88.568603515625, | |
| "loss": 0.0217, | |
| "rewards/accuracies": 0.9913366436958313, | |
| "rewards/chosen": 1.5466176271438599, | |
| "rewards/margins": 9.043907165527344, | |
| "rewards/rejected": -7.497289657592773, | |
| "step": 2727 | |
| }, | |
| { | |
| "epoch": 0.5015310767193578, | |
| "grad_norm": 1.1985265016555786, | |
| "learning_rate": 5.538999264164827e-07, | |
| "logits/chosen": -0.32679906487464905, | |
| "logits/rejected": -0.16868844628334045, | |
| "logps/chosen": -57.272857666015625, | |
| "logps/rejected": -90.3719711303711, | |
| "loss": 0.0215, | |
| "rewards/accuracies": 0.9929868578910828, | |
| "rewards/chosen": 1.4742236137390137, | |
| "rewards/margins": 9.2846097946167, | |
| "rewards/rejected": -7.8103861808776855, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.5516841843912935, | |
| "grad_norm": 0.004047638736665249, | |
| "learning_rate": 4.981604120676968e-07, | |
| "logits/chosen": -0.3204115033149719, | |
| "logits/rejected": -0.17468391358852386, | |
| "logps/chosen": -58.51227951049805, | |
| "logps/rejected": -94.06062316894531, | |
| "loss": 0.0278, | |
| "rewards/accuracies": 0.9905115962028503, | |
| "rewards/chosen": 1.527059555053711, | |
| "rewards/margins": 9.519412994384766, | |
| "rewards/rejected": -7.992353916168213, | |
| "step": 3333 | |
| }, | |
| { | |
| "epoch": 0.6018372920632293, | |
| "grad_norm": 0.3182278573513031, | |
| "learning_rate": 4.4242089771891094e-07, | |
| "logits/chosen": -0.3149339258670807, | |
| "logits/rejected": -0.15087205171585083, | |
| "logps/chosen": -57.350006103515625, | |
| "logps/rejected": -94.76825714111328, | |
| "loss": 0.0276, | |
| "rewards/accuracies": 0.9900990128517151, | |
| "rewards/chosen": 1.4863520860671997, | |
| "rewards/margins": 9.664637565612793, | |
| "rewards/rejected": -8.178285598754883, | |
| "step": 3636 | |
| }, | |
| { | |
| "epoch": 0.6519903997351651, | |
| "grad_norm": 0.016533929854631424, | |
| "learning_rate": 3.866813833701251e-07, | |
| "logits/chosen": -0.3052721619606018, | |
| "logits/rejected": -0.1389181911945343, | |
| "logps/chosen": -58.16946792602539, | |
| "logps/rejected": -95.81718444824219, | |
| "loss": 0.0245, | |
| "rewards/accuracies": 0.9909241199493408, | |
| "rewards/chosen": 1.4919017553329468, | |
| "rewards/margins": 9.79186725616455, | |
| "rewards/rejected": -8.299964904785156, | |
| "step": 3939 | |
| }, | |
| { | |
| "epoch": 0.7021435074071009, | |
| "grad_norm": 0.010237179696559906, | |
| "learning_rate": 3.3094186902133917e-07, | |
| "logits/chosen": -0.3208546042442322, | |
| "logits/rejected": -0.1598815768957138, | |
| "logps/chosen": -58.12910079956055, | |
| "logps/rejected": -97.86547088623047, | |
| "loss": 0.0245, | |
| "rewards/accuracies": 0.9913366436958313, | |
| "rewards/chosen": 1.4368475675582886, | |
| "rewards/margins": 9.900219917297363, | |
| "rewards/rejected": -8.463372230529785, | |
| "step": 4242 | |
| }, | |
| { | |
| "epoch": 0.7522966150790367, | |
| "grad_norm": 0.008713229559361935, | |
| "learning_rate": 2.752023546725533e-07, | |
| "logits/chosen": -0.3229035437107086, | |
| "logits/rejected": -0.15609696507453918, | |
| "logps/chosen": -57.847740173339844, | |
| "logps/rejected": -98.02397918701172, | |
| "loss": 0.019, | |
| "rewards/accuracies": 0.9925742745399475, | |
| "rewards/chosen": 1.491112232208252, | |
| "rewards/margins": 10.048946380615234, | |
| "rewards/rejected": -8.557833671569824, | |
| "step": 4545 | |
| }, | |
| { | |
| "epoch": 0.8024497227509725, | |
| "grad_norm": 0.0016923310467973351, | |
| "learning_rate": 2.1946284032376748e-07, | |
| "logits/chosen": -0.3198649287223816, | |
| "logits/rejected": -0.1555498093366623, | |
| "logps/chosen": -57.79204177856445, | |
| "logps/rejected": -97.5900650024414, | |
| "loss": 0.0152, | |
| "rewards/accuracies": 0.9958746433258057, | |
| "rewards/chosen": 1.5458145141601562, | |
| "rewards/margins": 9.990068435668945, | |
| "rewards/rejected": -8.444254875183105, | |
| "step": 4848 | |
| }, | |
| { | |
| "epoch": 0.8526028304229082, | |
| "grad_norm": 0.005034138448536396, | |
| "learning_rate": 1.637233259749816e-07, | |
| "logits/chosen": -0.31206631660461426, | |
| "logits/rejected": -0.13810566067695618, | |
| "logps/chosen": -60.38402557373047, | |
| "logps/rejected": -97.9999008178711, | |
| "loss": 0.0182, | |
| "rewards/accuracies": 0.9938119053840637, | |
| "rewards/chosen": 1.5842220783233643, | |
| "rewards/margins": 10.07706356048584, | |
| "rewards/rejected": -8.492840766906738, | |
| "step": 5151 | |
| }, | |
| { | |
| "epoch": 0.902755938094844, | |
| "grad_norm": 0.01207835040986538, | |
| "learning_rate": 1.0798381162619573e-07, | |
| "logits/chosen": -0.3065117299556732, | |
| "logits/rejected": -0.13630090653896332, | |
| "logps/chosen": -59.60470199584961, | |
| "logps/rejected": -98.72499084472656, | |
| "loss": 0.0196, | |
| "rewards/accuracies": 0.9933993816375732, | |
| "rewards/chosen": 1.5179524421691895, | |
| "rewards/margins": 10.089001655578613, | |
| "rewards/rejected": -8.571049690246582, | |
| "step": 5454 | |
| }, | |
| { | |
| "epoch": 0.9529090457667798, | |
| "grad_norm": 0.008624515496194363, | |
| "learning_rate": 5.224429727740986e-08, | |
| "logits/chosen": -0.298076331615448, | |
| "logits/rejected": -0.12163959443569183, | |
| "logps/chosen": -61.02180862426758, | |
| "logps/rejected": -98.43746185302734, | |
| "loss": 0.0201, | |
| "rewards/accuracies": 0.9942244291305542, | |
| "rewards/chosen": 1.5929877758026123, | |
| "rewards/margins": 10.118744850158691, | |
| "rewards/rejected": -8.525758743286133, | |
| "step": 5757 | |
| }, | |
| { | |
| "epoch": 0.999917239096251, | |
| "step": 6041, | |
| "total_flos": 3.6566231979141366e+18, | |
| "train_loss": 0.08035980544132106, | |
| "train_runtime": 90068.102, | |
| "train_samples_per_second": 0.537, | |
| "train_steps_per_second": 0.067 | |
| } | |
| ], | |
| "logging_steps": 303, | |
| "max_steps": 6041, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.6566231979141366e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |