| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9992429977289932, |
| "eval_steps": 100, |
| "global_step": 165, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "learning_rate": 2.9411764705882356e-07, |
| "logits/chosen": -2.737081289291382, |
| "logits/rejected": -2.680964708328247, |
| "logps/chosen": -126.38134765625, |
| "logps/rejected": -136.25076293945312, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 2.9411764705882355e-06, |
| "logits/chosen": -2.731968879699707, |
| "logits/rejected": -2.708989143371582, |
| "logps/chosen": -118.21139526367188, |
| "logps/rejected": -111.94728088378906, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.4583333432674408, |
| "rewards/chosen": -1.655664891586639e-05, |
| "rewards/margins": -7.096579793142155e-05, |
| "rewards/rejected": 5.4409170843428e-05, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.994932636402032e-06, |
| "logits/chosen": -2.744499683380127, |
| "logits/rejected": -2.7339892387390137, |
| "logps/chosen": -113.14430236816406, |
| "logps/rejected": -123.68851470947266, |
| "loss": 0.6929, |
| "rewards/accuracies": 0.5218750238418579, |
| "rewards/chosen": 0.007225497625768185, |
| "rewards/margins": 0.0004294503596611321, |
| "rewards/rejected": 0.0067960480228066444, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.905416503522124e-06, |
| "logits/chosen": -2.772244930267334, |
| "logits/rejected": -2.7886576652526855, |
| "logps/chosen": -115.32562255859375, |
| "logps/rejected": -122.47587585449219, |
| "loss": 0.6925, |
| "rewards/accuracies": 0.518750011920929, |
| "rewards/chosen": 0.024915488436818123, |
| "rewards/margins": 0.00023288575175683945, |
| "rewards/rejected": 0.024682600051164627, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.707922373336524e-06, |
| "logits/chosen": -2.740614175796509, |
| "logits/rejected": -2.727870464324951, |
| "logps/chosen": -111.22251892089844, |
| "logps/rejected": -113.79164123535156, |
| "loss": 0.691, |
| "rewards/accuracies": 0.596875011920929, |
| "rewards/chosen": 0.052265096455812454, |
| "rewards/margins": 0.003552838694304228, |
| "rewards/rejected": 0.04871225729584694, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 4.411315662967732e-06, |
| "logits/chosen": -2.775801181793213, |
| "logits/rejected": -2.7031962871551514, |
| "logps/chosen": -109.81254577636719, |
| "logps/rejected": -113.3335952758789, |
| "loss": 0.6897, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": 0.0849301889538765, |
| "rewards/margins": 0.006556935608386993, |
| "rewards/rejected": 0.0783732533454895, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 4.028910905897229e-06, |
| "logits/chosen": -2.756106376647949, |
| "logits/rejected": -2.7039153575897217, |
| "logps/chosen": -110.66825103759766, |
| "logps/rejected": -110.25054931640625, |
| "loss": 0.689, |
| "rewards/accuracies": 0.596875011920929, |
| "rewards/chosen": 0.09365645796060562, |
| "rewards/margins": 0.008471069857478142, |
| "rewards/rejected": 0.08518538624048233, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 3.577874068920446e-06, |
| "logits/chosen": -2.7151947021484375, |
| "logits/rejected": -2.706727981567383, |
| "logps/chosen": -109.93450927734375, |
| "logps/rejected": -113.0154800415039, |
| "loss": 0.6885, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": 0.07744868099689484, |
| "rewards/margins": 0.004513105843216181, |
| "rewards/rejected": 0.0729355737566948, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 3.0784519801008546e-06, |
| "logits/chosen": -2.692235231399536, |
| "logits/rejected": -2.6538617610931396, |
| "logps/chosen": -111.87430572509766, |
| "logps/rejected": -115.57984924316406, |
| "loss": 0.6867, |
| "rewards/accuracies": 0.596875011920929, |
| "rewards/chosen": 0.07563529908657074, |
| "rewards/margins": 0.015832457691431046, |
| "rewards/rejected": 0.059802841395139694, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 2.553063458334059e-06, |
| "logits/chosen": -2.738804340362549, |
| "logits/rejected": -2.690701723098755, |
| "logps/chosen": -120.409423828125, |
| "logps/rejected": -118.25407409667969, |
| "loss": 0.6841, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": 0.05362895876169205, |
| "rewards/margins": 0.01961613819003105, |
| "rewards/rejected": 0.034012824296951294, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 2.025292943281429e-06, |
| "logits/chosen": -2.718660593032837, |
| "logits/rejected": -2.7189323902130127, |
| "logps/chosen": -108.1891098022461, |
| "logps/rejected": -112.934814453125, |
| "loss": 0.6837, |
| "rewards/accuracies": 0.590624988079071, |
| "rewards/chosen": 0.06428461521863937, |
| "rewards/margins": 0.0179769154638052, |
| "rewards/rejected": 0.04630769044160843, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.61, |
| "eval_logits/chosen": -2.7115368843078613, |
| "eval_logits/rejected": -2.6278061866760254, |
| "eval_logps/chosen": -286.1498718261719, |
| "eval_logps/rejected": -263.6365966796875, |
| "eval_loss": 0.6740216016769409, |
| "eval_rewards/accuracies": 0.6359999775886536, |
| "eval_rewards/chosen": 0.008544988930225372, |
| "eval_rewards/margins": 0.03750109300017357, |
| "eval_rewards/rejected": -0.028956104069948196, |
| "eval_runtime": 383.9881, |
| "eval_samples_per_second": 5.208, |
| "eval_steps_per_second": 0.651, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 1.5188318011445907e-06, |
| "logits/chosen": -2.7399725914001465, |
| "logits/rejected": -2.710850954055786, |
| "logps/chosen": -123.49687194824219, |
| "logps/rejected": -121.06239318847656, |
| "loss": 0.6835, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": 0.031930722296237946, |
| "rewards/margins": 0.025511348620057106, |
| "rewards/rejected": 0.00641937181353569, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 1.0564148305586296e-06, |
| "logits/chosen": -2.733646869659424, |
| "logits/rejected": -2.7130093574523926, |
| "logps/chosen": -116.14371490478516, |
| "logps/rejected": -121.31622314453125, |
| "loss": 0.6835, |
| "rewards/accuracies": 0.559374988079071, |
| "rewards/chosen": 0.04128889739513397, |
| "rewards/margins": 0.021482665091753006, |
| "rewards/rejected": 0.019806232303380966, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 6.587997083462197e-07, |
| "logits/chosen": -2.706106424331665, |
| "logits/rejected": -2.683042049407959, |
| "logps/chosen": -121.27201080322266, |
| "logps/rejected": -124.78038024902344, |
| "loss": 0.6784, |
| "rewards/accuracies": 0.621874988079071, |
| "rewards/chosen": 0.032023753970861435, |
| "rewards/margins": 0.03133785352110863, |
| "rewards/rejected": 0.0006858977722004056, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 3.438351873250492e-07, |
| "logits/chosen": -2.6774215698242188, |
| "logits/rejected": -2.66115140914917, |
| "logps/chosen": -111.2270736694336, |
| "logps/rejected": -116.47705078125, |
| "loss": 0.6774, |
| "rewards/accuracies": 0.6187499761581421, |
| "rewards/chosen": 0.032939545810222626, |
| "rewards/margins": 0.04166686534881592, |
| "rewards/rejected": -0.008727315813302994, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 1.2565987432367032e-07, |
| "logits/chosen": -2.7032761573791504, |
| "logits/rejected": -2.6723108291625977, |
| "logps/chosen": -109.5132064819336, |
| "logps/rejected": -114.572509765625, |
| "loss": 0.6802, |
| "rewards/accuracies": 0.581250011920929, |
| "rewards/chosen": 0.037231095135211945, |
| "rewards/margins": 0.029751187190413475, |
| "rewards/rejected": 0.007479907013475895, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 1.4067554877743861e-08, |
| "logits/chosen": -2.6688144207000732, |
| "logits/rejected": -2.6238021850585938, |
| "logps/chosen": -110.0870361328125, |
| "logps/rejected": -108.6930160522461, |
| "loss": 0.68, |
| "rewards/accuracies": 0.5843750238418579, |
| "rewards/chosen": 0.050410233438014984, |
| "rewards/margins": 0.03611644357442856, |
| "rewards/rejected": 0.014293788000941277, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 165, |
| "total_flos": 0.0, |
| "train_loss": 0.685761218359976, |
| "train_runtime": 6660.773, |
| "train_samples_per_second": 3.173, |
| "train_steps_per_second": 0.025 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 165, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|