| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9984, | |
| "eval_steps": 500, | |
| "global_step": 156, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 46.9622688293457, | |
| "learning_rate": 6.249999999999999e-07, | |
| "logits/chosen": -1.849869966506958, | |
| "logits/rejected": -0.29363900423049927, | |
| "logps/chosen": -214.13339233398438, | |
| "logps/rejected": -737.3911743164062, | |
| "loss": 0.7092, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": 0.004342720843851566, | |
| "rewards/margins": 0.025443650782108307, | |
| "rewards/rejected": -0.021100929006934166, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 13.521223068237305, | |
| "learning_rate": 9.979871469976195e-07, | |
| "logits/chosen": -1.8478429317474365, | |
| "logits/rejected": -0.2751621603965759, | |
| "logps/chosen": -240.21755981445312, | |
| "logps/rejected": -844.638427734375, | |
| "loss": 0.486, | |
| "rewards/accuracies": 0.9468750357627869, | |
| "rewards/chosen": 0.0402272529900074, | |
| "rewards/margins": 0.6919995546340942, | |
| "rewards/rejected": -0.6517722606658936, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 0.762617290019989, | |
| "learning_rate": 9.755282581475767e-07, | |
| "logits/chosen": -2.1506738662719727, | |
| "logits/rejected": -0.8184519410133362, | |
| "logps/chosen": -241.9251251220703, | |
| "logps/rejected": -829.9989624023438, | |
| "loss": 0.1228, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -0.33827781677246094, | |
| "rewards/margins": 4.653146266937256, | |
| "rewards/rejected": -4.991424083709717, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 1.0136756896972656, | |
| "learning_rate": 9.29224396800933e-07, | |
| "logits/chosen": -2.6077799797058105, | |
| "logits/rejected": -1.6289113759994507, | |
| "logps/chosen": -261.08819580078125, | |
| "logps/rejected": -949.7811889648438, | |
| "loss": 0.0522, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -2.5243332386016846, | |
| "rewards/margins": 14.23307991027832, | |
| "rewards/rejected": -16.75741195678711, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.15248946845531464, | |
| "learning_rate": 8.613974319136957e-07, | |
| "logits/chosen": -2.809011459350586, | |
| "logits/rejected": -1.9814908504486084, | |
| "logps/chosen": -262.7459411621094, | |
| "logps/rejected": -1068.808349609375, | |
| "loss": 0.0401, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": -4.401086807250977, | |
| "rewards/margins": 23.641544342041016, | |
| "rewards/rejected": -28.04262924194336, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 0.12552589178085327, | |
| "learning_rate": 7.754484907260512e-07, | |
| "logits/chosen": -2.7779171466827393, | |
| "logits/rejected": -2.0284547805786133, | |
| "logps/chosen": -282.9354248046875, | |
| "logps/rejected": -1090.5594482421875, | |
| "loss": 0.036, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": -5.416067123413086, | |
| "rewards/margins": 25.74221420288086, | |
| "rewards/rejected": -31.158283233642578, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 0.17482662200927734, | |
| "learning_rate": 6.756874120406714e-07, | |
| "logits/chosen": -2.7689507007598877, | |
| "logits/rejected": -1.9573109149932861, | |
| "logps/chosen": -263.86279296875, | |
| "logps/rejected": -1074.0966796875, | |
| "loss": 0.0455, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": -4.39678955078125, | |
| "rewards/margins": 24.96966552734375, | |
| "rewards/rejected": -29.366456985473633, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 0.11299557238817215, | |
| "learning_rate": 5.671166329088277e-07, | |
| "logits/chosen": -2.643343448638916, | |
| "logits/rejected": -1.8395075798034668, | |
| "logps/chosen": -271.737060546875, | |
| "logps/rejected": -1051.107421875, | |
| "loss": 0.0341, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.8055081367492676, | |
| "rewards/margins": 22.575603485107422, | |
| "rewards/rejected": -26.381113052368164, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 0.12716512382030487, | |
| "learning_rate": 4.5518034554828327e-07, | |
| "logits/chosen": -2.627561092376709, | |
| "logits/rejected": -1.8044594526290894, | |
| "logps/chosen": -267.9153747558594, | |
| "logps/rejected": -1012.906494140625, | |
| "loss": 0.046, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": -3.2899169921875, | |
| "rewards/margins": 20.98080825805664, | |
| "rewards/rejected": -24.270723342895508, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.14720195531845093, | |
| "learning_rate": 3.454915028125263e-07, | |
| "logits/chosen": -2.617551803588867, | |
| "logits/rejected": -1.7380447387695312, | |
| "logps/chosen": -254.84658813476562, | |
| "logps/rejected": -996.9198608398438, | |
| "loss": 0.0315, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.6352035999298096, | |
| "rewards/margins": 19.858867645263672, | |
| "rewards/rejected": -22.49407196044922, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.704, | |
| "grad_norm": 0.1784060150384903, | |
| "learning_rate": 2.4355036129704696e-07, | |
| "logits/chosen": -2.598905324935913, | |
| "logits/rejected": -1.7216695547103882, | |
| "logps/chosen": -248.74453735351562, | |
| "logps/rejected": -993.6848754882812, | |
| "loss": 0.036, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.5882961750030518, | |
| "rewards/margins": 18.992816925048828, | |
| "rewards/rejected": -21.58111572265625, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "grad_norm": 0.11419311910867691, | |
| "learning_rate": 1.5446867550656767e-07, | |
| "logits/chosen": -2.5767769813537598, | |
| "logits/rejected": -1.681131362915039, | |
| "logps/chosen": -256.5839538574219, | |
| "logps/rejected": -994.3280029296875, | |
| "loss": 0.0745, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -2.4366414546966553, | |
| "rewards/margins": 18.98566436767578, | |
| "rewards/rejected": -21.422306060791016, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.832, | |
| "grad_norm": 0.10517225414514542, | |
| "learning_rate": 8.271337313934867e-08, | |
| "logits/chosen": -2.5168538093566895, | |
| "logits/rejected": -1.6293193101882935, | |
| "logps/chosen": -269.16278076171875, | |
| "logps/rejected": -979.6190795898438, | |
| "loss": 0.0499, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": -2.368694543838501, | |
| "rewards/margins": 18.073989868164062, | |
| "rewards/rejected": -20.442684173583984, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.896, | |
| "grad_norm": 0.1312648206949234, | |
| "learning_rate": 3.188256468013139e-08, | |
| "logits/chosen": -2.5698208808898926, | |
| "logits/rejected": -1.6306824684143066, | |
| "logps/chosen": -250.3894805908203, | |
| "logps/rejected": -1000.3046875, | |
| "loss": 0.0339, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": -2.0379717350006104, | |
| "rewards/margins": 18.880815505981445, | |
| "rewards/rejected": -20.918787002563477, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.11260352283716202, | |
| "learning_rate": 4.5251191160326495e-09, | |
| "logits/chosen": -2.5329906940460205, | |
| "logits/rejected": -1.5752902030944824, | |
| "logps/chosen": -271.48443603515625, | |
| "logps/rejected": -1053.84765625, | |
| "loss": 0.0735, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -2.3061203956604004, | |
| "rewards/margins": 19.025531768798828, | |
| "rewards/rejected": -21.331653594970703, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.9984, | |
| "step": 156, | |
| "total_flos": 1.1115841451898962e+18, | |
| "train_loss": 0.1217762088546386, | |
| "train_runtime": 6099.7709, | |
| "train_samples_per_second": 0.82, | |
| "train_steps_per_second": 0.026 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 156, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.1115841451898962e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |