| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9945, |
| "eval_steps": 500, |
| "global_step": 153, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 0.20925004399681482, |
| "learning_rate": 3.125e-08, |
| "logits/chosen": -1.6728180646896362, |
| "logits/rejected": -1.6728180646896362, |
| "logps/chosen": -139.26568603515625, |
| "logps/pi_response": -223.70187377929688, |
| "logps/ref_response": -223.70187377929688, |
| "logps/rejected": -139.26568603515625, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.07, |
| "eta": 0.0009999999310821295, |
| "grad_norm": 0.2664449122199502, |
| "learning_rate": 3.1249999999999997e-07, |
| "logits/chosen": -1.9198987483978271, |
| "logits/rejected": -1.9198987483978271, |
| "logps/chosen": -185.7984161376953, |
| "logps/pi_response": -284.7489929199219, |
| "logps/ref_response": -274.8498229980469, |
| "logps/rejected": -185.7984161376953, |
| "loss": 0.693, |
| "rewards/accuracies": 0.09829059988260269, |
| "rewards/chosen": -0.05739467218518257, |
| "rewards/margins": -8.19944467878031e-09, |
| "rewards/rejected": -0.057394664734601974, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.13, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 0.17607425906899, |
| "learning_rate": 4.989490450759331e-07, |
| "logits/chosen": -1.2133342027664185, |
| "logits/rejected": -1.2133342027664185, |
| "logps/chosen": -344.1175231933594, |
| "logps/pi_response": -420.7164001464844, |
| "logps/ref_response": -268.8954772949219, |
| "logps/rejected": -344.1175231933594, |
| "loss": 0.6916, |
| "rewards/accuracies": 0.17307692766189575, |
| "rewards/chosen": -1.5597572326660156, |
| "rewards/margins": 2.3523059056174134e-08, |
| "rewards/rejected": -1.5597573518753052, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.2, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 0.9959359173659126, |
| "learning_rate": 4.872270441827174e-07, |
| "logits/chosen": 0.10036426037549973, |
| "logits/rejected": 0.10036426037549973, |
| "logps/chosen": -764.1023559570312, |
| "logps/pi_response": -850.1804809570312, |
| "logps/ref_response": -272.0489807128906, |
| "logps/rejected": -764.1023559570312, |
| "loss": 0.6873, |
| "rewards/accuracies": 0.11153846234083176, |
| "rewards/chosen": -5.858819007873535, |
| "rewards/margins": -3.943076620771535e-08, |
| "rewards/rejected": -5.858819484710693, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.26, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 1.9030553381279938, |
| "learning_rate": 4.6308512113530063e-07, |
| "logits/chosen": 2.6020889282226562, |
| "logits/rejected": 2.6020889282226562, |
| "logps/chosen": -15368.376953125, |
| "logps/pi_response": -9728.86328125, |
| "logps/ref_response": -275.9498596191406, |
| "logps/rejected": -15368.376953125, |
| "loss": 0.6, |
| "rewards/accuracies": 0.042307693511247635, |
| "rewards/chosen": -151.69932556152344, |
| "rewards/margins": 1.8339891028062993e-07, |
| "rewards/rejected": -151.69932556152344, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.33, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 0.9022013856679871, |
| "learning_rate": 4.277872161641681e-07, |
| "logits/chosen": 5.100710391998291, |
| "logits/rejected": 5.100710391998291, |
| "logps/chosen": -25452.970703125, |
| "logps/pi_response": -16005.287109375, |
| "logps/ref_response": -266.91033935546875, |
| "logps/rejected": -25452.970703125, |
| "loss": 0.5304, |
| "rewards/accuracies": 0.023076923564076424, |
| "rewards/chosen": -252.7429656982422, |
| "rewards/margins": -4.69501202360334e-07, |
| "rewards/rejected": -252.7429656982422, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.39, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 0.9078413895498612, |
| "learning_rate": 3.8318133624280046e-07, |
| "logits/chosen": 4.621513843536377, |
| "logits/rejected": 4.621513843536377, |
| "logps/chosen": -29423.666015625, |
| "logps/pi_response": -17608.6015625, |
| "logps/ref_response": -265.94757080078125, |
| "logps/rejected": -29423.666015625, |
| "loss": 0.519, |
| "rewards/accuracies": 0.03076923079788685, |
| "rewards/chosen": -292.3346862792969, |
| "rewards/margins": 6.455641710090276e-07, |
| "rewards/rejected": -292.3346862792969, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.46, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 0.7756777568132545, |
| "learning_rate": 3.316028034595861e-07, |
| "logits/chosen": 4.356642723083496, |
| "logits/rejected": 4.356642723083496, |
| "logps/chosen": -29171.62890625, |
| "logps/pi_response": -18937.943359375, |
| "logps/ref_response": -276.5423278808594, |
| "logps/rejected": -29171.62890625, |
| "loss": 0.5212, |
| "rewards/accuracies": 0.015384615398943424, |
| "rewards/chosen": -289.7953796386719, |
| "rewards/margins": 0.0, |
| "rewards/rejected": -289.7953796386719, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.52, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 0.9174650913563618, |
| "learning_rate": 2.7575199021178855e-07, |
| "logits/chosen": 4.428848743438721, |
| "logits/rejected": 4.428848743438721, |
| "logps/chosen": -27704.791015625, |
| "logps/pi_response": -18176.86328125, |
| "logps/ref_response": -271.93060302734375, |
| "logps/rejected": -27704.791015625, |
| "loss": 0.5132, |
| "rewards/accuracies": 0.01923076994717121, |
| "rewards/chosen": -275.2251281738281, |
| "rewards/margins": -9.97690108306415e-07, |
| "rewards/rejected": -275.2251281738281, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.58, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 0.9019855937057101, |
| "learning_rate": 2.1855294234408068e-07, |
| "logits/chosen": 4.3772735595703125, |
| "logits/rejected": 4.3772735595703125, |
| "logps/chosen": -29193.177734375, |
| "logps/pi_response": -17440.9921875, |
| "logps/ref_response": -266.1241455078125, |
| "logps/rejected": -29193.177734375, |
| "loss": 0.5231, |
| "rewards/accuracies": 0.03076923079788685, |
| "rewards/chosen": -290.08026123046875, |
| "rewards/margins": -2.9343825147520874e-08, |
| "rewards/rejected": -290.0802307128906, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.65, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 0.8179873354250534, |
| "learning_rate": 1.6300029195778453e-07, |
| "logits/chosen": 4.375646114349365, |
| "logits/rejected": 4.375646114349365, |
| "logps/chosen": -27328.33203125, |
| "logps/pi_response": -17462.0546875, |
| "logps/ref_response": -267.173828125, |
| "logps/rejected": -27328.33203125, |
| "loss": 0.5171, |
| "rewards/accuracies": 0.026923077180981636, |
| "rewards/chosen": -271.51708984375, |
| "rewards/margins": 1.613910427522569e-07, |
| "rewards/rejected": -271.51708984375, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.71, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 0.9895798769556228, |
| "learning_rate": 1.1200247470632392e-07, |
| "logits/chosen": 4.529591083526611, |
| "logits/rejected": 4.529591083526611, |
| "logps/chosen": -28326.220703125, |
| "logps/pi_response": -18734.5078125, |
| "logps/ref_response": -291.685791015625, |
| "logps/rejected": -28326.220703125, |
| "loss": 0.5103, |
| "rewards/accuracies": 0.04615384712815285, |
| "rewards/chosen": -281.44744873046875, |
| "rewards/margins": 1.540550869094659e-07, |
| "rewards/rejected": -281.44744873046875, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.78, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 0.9157342554933109, |
| "learning_rate": 6.822945986946385e-08, |
| "logits/chosen": 4.574192523956299, |
| "logits/rejected": 4.574192523956299, |
| "logps/chosen": -29500.658203125, |
| "logps/pi_response": -18083.599609375, |
| "logps/ref_response": -266.3084716796875, |
| "logps/rejected": -29500.658203125, |
| "loss": 0.5148, |
| "rewards/accuracies": 0.05384615436196327, |
| "rewards/chosen": -293.0876159667969, |
| "rewards/margins": 1.4085036355027114e-06, |
| "rewards/rejected": -293.0876159667969, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.84, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 0.932891201307216, |
| "learning_rate": 3.397296523427806e-08, |
| "logits/chosen": 4.706723213195801, |
| "logits/rejected": 4.706723213195801, |
| "logps/chosen": -26521.8828125, |
| "logps/pi_response": -15752.3818359375, |
| "logps/ref_response": -260.1748046875, |
| "logps/rejected": -26521.884765625, |
| "loss": 0.5218, |
| "rewards/accuracies": 0.04615384712815285, |
| "rewards/chosen": -263.5008544921875, |
| "rewards/margins": 8.876506853994215e-07, |
| "rewards/rejected": -263.5008544921875, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.91, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 0.7630353244282155, |
| "learning_rate": 1.1026475173977978e-08, |
| "logits/chosen": 4.662195205688477, |
| "logits/rejected": 4.662195205688477, |
| "logps/chosen": -29146.107421875, |
| "logps/pi_response": -18217.185546875, |
| "logps/ref_response": -266.84295654296875, |
| "logps/rejected": -29146.107421875, |
| "loss": 0.5144, |
| "rewards/accuracies": 0.03846153989434242, |
| "rewards/chosen": -289.6330261230469, |
| "rewards/margins": 5.868765029504175e-08, |
| "rewards/rejected": -289.6330261230469, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.97, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 0.7944892762549526, |
| "learning_rate": 5.913435276374834e-10, |
| "logits/chosen": 4.59439754486084, |
| "logits/rejected": 4.59439754486084, |
| "logps/chosen": -28846.330078125, |
| "logps/pi_response": -18351.314453125, |
| "logps/ref_response": -276.857666015625, |
| "logps/rejected": -28846.330078125, |
| "loss": 0.5191, |
| "rewards/accuracies": 0.03846153989434242, |
| "rewards/chosen": -286.6873779296875, |
| "rewards/margins": -8.803147579783399e-07, |
| "rewards/rejected": -286.6873779296875, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.99, |
| "step": 153, |
| "total_flos": 0.0, |
| "train_loss": 0.0, |
| "train_runtime": 0.0094, |
| "train_samples_per_second": 2122892.066, |
| "train_steps_per_second": 16240.124 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 153, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 200, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|