| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9945, | |
| "eval_steps": 500, | |
| "global_step": 153, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "eta": 0.0010000000474974513, | |
| "grad_norm": 8.605272045068777, | |
| "learning_rate": 3.125e-08, | |
| "logits/chosen": -2.8784992694854736, | |
| "logits/rejected": -2.8769874572753906, | |
| "logps/chosen": -263.9749755859375, | |
| "logps/pi_response": -246.19029235839844, | |
| "logps/ref_response": -246.19029235839844, | |
| "logps/rejected": -308.2843322753906, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eta": 0.0010000000474974513, | |
| "grad_norm": 8.688961504116353, | |
| "learning_rate": 3.1249999999999997e-07, | |
| "logits/chosen": -2.936194896697998, | |
| "logits/rejected": -2.808932304382324, | |
| "logps/chosen": -315.6687927246094, | |
| "logps/pi_response": -209.20472717285156, | |
| "logps/ref_response": -209.1347198486328, | |
| "logps/rejected": -260.7985534667969, | |
| "loss": 0.6928, | |
| "rewards/accuracies": 0.4829059839248657, | |
| "rewards/chosen": 0.00021380360703915358, | |
| "rewards/margins": 0.0008379952632822096, | |
| "rewards/rejected": -0.0006241916562430561, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eta": 0.0010000000474974513, | |
| "grad_norm": 8.833821950128216, | |
| "learning_rate": 4.989490450759331e-07, | |
| "logits/chosen": -2.852677583694458, | |
| "logits/rejected": -2.8317201137542725, | |
| "logps/chosen": -274.80267333984375, | |
| "logps/pi_response": -189.35801696777344, | |
| "logps/ref_response": -187.89822387695312, | |
| "logps/rejected": -261.1772766113281, | |
| "loss": 0.6876, | |
| "rewards/accuracies": 0.6846153736114502, | |
| "rewards/chosen": -0.006662360858172178, | |
| "rewards/margins": 0.012640128843486309, | |
| "rewards/rejected": -0.0193024892359972, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eta": 0.0010000000474974513, | |
| "grad_norm": 9.337463091325768, | |
| "learning_rate": 4.872270441827174e-07, | |
| "logits/chosen": -2.8088669776916504, | |
| "logits/rejected": -2.791938304901123, | |
| "logps/chosen": -275.8798828125, | |
| "logps/pi_response": -215.20196533203125, | |
| "logps/ref_response": -213.5146484375, | |
| "logps/rejected": -255.837890625, | |
| "loss": 0.6691, | |
| "rewards/accuracies": 0.7153846025466919, | |
| "rewards/chosen": 0.021489957347512245, | |
| "rewards/margins": 0.061311714351177216, | |
| "rewards/rejected": -0.03982176259160042, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eta": 0.0010000000474974513, | |
| "grad_norm": 12.278601194362231, | |
| "learning_rate": 4.6308512113530063e-07, | |
| "logits/chosen": -2.7728219032287598, | |
| "logits/rejected": -2.690376043319702, | |
| "logps/chosen": -278.7479553222656, | |
| "logps/pi_response": -190.5654296875, | |
| "logps/ref_response": -177.33053588867188, | |
| "logps/rejected": -246.11264038085938, | |
| "loss": 0.6443, | |
| "rewards/accuracies": 0.6730769276618958, | |
| "rewards/chosen": -0.05301598832011223, | |
| "rewards/margins": 0.10135015100240707, | |
| "rewards/rejected": -0.154366135597229, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eta": 0.0010000000474974513, | |
| "grad_norm": 15.472310357744927, | |
| "learning_rate": 4.277872161641681e-07, | |
| "logits/chosen": -2.792513608932495, | |
| "logits/rejected": -2.7258100509643555, | |
| "logps/chosen": -291.08642578125, | |
| "logps/pi_response": -217.5293426513672, | |
| "logps/ref_response": -194.07823181152344, | |
| "logps/rejected": -272.1592102050781, | |
| "loss": 0.619, | |
| "rewards/accuracies": 0.6692307591438293, | |
| "rewards/chosen": -0.05969160422682762, | |
| "rewards/margins": 0.2206883430480957, | |
| "rewards/rejected": -0.2803799510002136, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eta": 0.0010000000474974513, | |
| "grad_norm": 12.018657879137255, | |
| "learning_rate": 3.8318133624280046e-07, | |
| "logits/chosen": -2.7461166381835938, | |
| "logits/rejected": -2.6338207721710205, | |
| "logps/chosen": -268.39324951171875, | |
| "logps/pi_response": -218.18861389160156, | |
| "logps/ref_response": -193.3256072998047, | |
| "logps/rejected": -277.92572021484375, | |
| "loss": 0.611, | |
| "rewards/accuracies": 0.7153846025466919, | |
| "rewards/chosen": -0.13407574594020844, | |
| "rewards/margins": 0.21902315318584442, | |
| "rewards/rejected": -0.35309889912605286, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eta": 0.0010000000474974513, | |
| "grad_norm": 12.586943156361984, | |
| "learning_rate": 3.316028034595861e-07, | |
| "logits/chosen": -2.702092170715332, | |
| "logits/rejected": -2.648845672607422, | |
| "logps/chosen": -275.9683532714844, | |
| "logps/pi_response": -199.25994873046875, | |
| "logps/ref_response": -183.3825225830078, | |
| "logps/rejected": -281.8118896484375, | |
| "loss": 0.6125, | |
| "rewards/accuracies": 0.6692307591438293, | |
| "rewards/chosen": -0.07608187198638916, | |
| "rewards/margins": 0.22089019417762756, | |
| "rewards/rejected": -0.2969720661640167, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eta": 0.0010000000474974513, | |
| "grad_norm": 16.37677218967305, | |
| "learning_rate": 2.7575199021178855e-07, | |
| "logits/chosen": -2.654991388320923, | |
| "logits/rejected": -2.581737756729126, | |
| "logps/chosen": -314.16900634765625, | |
| "logps/pi_response": -250.20211791992188, | |
| "logps/ref_response": -203.31488037109375, | |
| "logps/rejected": -308.69287109375, | |
| "loss": 0.5967, | |
| "rewards/accuracies": 0.6730769276618958, | |
| "rewards/chosen": -0.329426109790802, | |
| "rewards/margins": 0.27185821533203125, | |
| "rewards/rejected": -0.6012843251228333, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eta": 0.0010000000474974513, | |
| "grad_norm": 22.869287847796812, | |
| "learning_rate": 2.1855294234408068e-07, | |
| "logits/chosen": -2.4663641452789307, | |
| "logits/rejected": -2.1920886039733887, | |
| "logps/chosen": -374.7882385253906, | |
| "logps/pi_response": -300.48028564453125, | |
| "logps/ref_response": -229.24087524414062, | |
| "logps/rejected": -370.0035400390625, | |
| "loss": 0.573, | |
| "rewards/accuracies": 0.7153846025466919, | |
| "rewards/chosen": -0.42087164521217346, | |
| "rewards/margins": 0.46370625495910645, | |
| "rewards/rejected": -0.8845779299736023, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eta": 0.0010000000474974513, | |
| "grad_norm": 22.10929439369714, | |
| "learning_rate": 1.6300029195778453e-07, | |
| "logits/chosen": -2.2815823554992676, | |
| "logits/rejected": -1.9420466423034668, | |
| "logps/chosen": -328.23388671875, | |
| "logps/pi_response": -285.6993408203125, | |
| "logps/ref_response": -202.154541015625, | |
| "logps/rejected": -346.80718994140625, | |
| "loss": 0.5648, | |
| "rewards/accuracies": 0.6653845906257629, | |
| "rewards/chosen": -0.5251672863960266, | |
| "rewards/margins": 0.47274622321128845, | |
| "rewards/rejected": -0.9979135394096375, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eta": 0.0010000000474974513, | |
| "grad_norm": 20.95321740793465, | |
| "learning_rate": 1.1200247470632392e-07, | |
| "logits/chosen": -2.211641311645508, | |
| "logits/rejected": -1.855459451675415, | |
| "logps/chosen": -360.8876953125, | |
| "logps/pi_response": -303.0977783203125, | |
| "logps/ref_response": -215.0885009765625, | |
| "logps/rejected": -370.98193359375, | |
| "loss": 0.563, | |
| "rewards/accuracies": 0.7423076629638672, | |
| "rewards/chosen": -0.47680747509002686, | |
| "rewards/margins": 0.5583351850509644, | |
| "rewards/rejected": -1.0351426601409912, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eta": 0.0010000000474974513, | |
| "grad_norm": 21.53917118957897, | |
| "learning_rate": 6.822945986946385e-08, | |
| "logits/chosen": -1.8491864204406738, | |
| "logits/rejected": -1.6956101655960083, | |
| "logps/chosen": -344.0650939941406, | |
| "logps/pi_response": -307.6352844238281, | |
| "logps/ref_response": -204.07801818847656, | |
| "logps/rejected": -390.5289001464844, | |
| "loss": 0.5501, | |
| "rewards/accuracies": 0.6884615421295166, | |
| "rewards/chosen": -0.6317132711410522, | |
| "rewards/margins": 0.5379453301429749, | |
| "rewards/rejected": -1.1696586608886719, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eta": 0.0010000000474974513, | |
| "grad_norm": 27.579199953433918, | |
| "learning_rate": 3.397296523427806e-08, | |
| "logits/chosen": -1.9508247375488281, | |
| "logits/rejected": -1.6159588098526, | |
| "logps/chosen": -333.64599609375, | |
| "logps/pi_response": -301.04547119140625, | |
| "logps/ref_response": -194.1094207763672, | |
| "logps/rejected": -385.6200256347656, | |
| "loss": 0.5332, | |
| "rewards/accuracies": 0.7038461565971375, | |
| "rewards/chosen": -0.6621810793876648, | |
| "rewards/margins": 0.6021292805671692, | |
| "rewards/rejected": -1.264310359954834, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eta": 0.0010000000474974513, | |
| "grad_norm": 26.751074987142594, | |
| "learning_rate": 1.1026475173977978e-08, | |
| "logits/chosen": -1.9854283332824707, | |
| "logits/rejected": -1.720418930053711, | |
| "logps/chosen": -328.14459228515625, | |
| "logps/pi_response": -306.83367919921875, | |
| "logps/ref_response": -197.67745971679688, | |
| "logps/rejected": -386.437255859375, | |
| "loss": 0.5515, | |
| "rewards/accuracies": 0.7038461565971375, | |
| "rewards/chosen": -0.6694343686103821, | |
| "rewards/margins": 0.5615480542182922, | |
| "rewards/rejected": -1.2309825420379639, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eta": 0.0010000000474974513, | |
| "grad_norm": 27.667206944684594, | |
| "learning_rate": 5.913435276374834e-10, | |
| "logits/chosen": -1.9676626920700073, | |
| "logits/rejected": -1.6368684768676758, | |
| "logps/chosen": -339.4317321777344, | |
| "logps/pi_response": -303.0911560058594, | |
| "logps/ref_response": -192.59991455078125, | |
| "logps/rejected": -375.9079895019531, | |
| "loss": 0.5599, | |
| "rewards/accuracies": 0.7192307710647583, | |
| "rewards/chosen": -0.6414641737937927, | |
| "rewards/margins": 0.5702866315841675, | |
| "rewards/rejected": -1.2117507457733154, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "step": 153, | |
| "total_flos": 0.0, | |
| "train_loss": 0.5998621676482406, | |
| "train_runtime": 41019.2972, | |
| "train_samples_per_second": 0.488, | |
| "train_steps_per_second": 0.004 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 153, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |