| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9992429977289932, |
| "eval_steps": 500, |
| "global_step": 165, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 18.46040839297478, |
| "learning_rate": 2.941176470588235e-08, |
| "logits/chosen": -1.823674201965332, |
| "logits/rejected": -1.9654152393341064, |
| "logps/chosen": -228.01856994628906, |
| "logps/pi_response": -374.7322998046875, |
| "logps/ref_response": -374.7322998046875, |
| "logps/rejected": -233.90797424316406, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.06, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 17.640747860282204, |
| "learning_rate": 2.941176470588235e-07, |
| "logits/chosen": -1.9038560390472412, |
| "logits/rejected": -1.9730311632156372, |
| "logps/chosen": -247.17388916015625, |
| "logps/pi_response": -390.3918762207031, |
| "logps/ref_response": -389.3831787109375, |
| "logps/rejected": -256.4879150390625, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.4305555522441864, |
| "rewards/chosen": -0.009673516266047955, |
| "rewards/margins": 0.001249339897185564, |
| "rewards/rejected": -0.010922855697572231, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.12, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 28.899289506668094, |
| "learning_rate": 4.994932636402031e-07, |
| "logits/chosen": -1.8548221588134766, |
| "logits/rejected": -1.8832534551620483, |
| "logps/chosen": -251.72506713867188, |
| "logps/pi_response": -406.16729736328125, |
| "logps/ref_response": -385.27423095703125, |
| "logps/rejected": -261.83319091796875, |
| "loss": 0.6886, |
| "rewards/accuracies": 0.5249999761581421, |
| "rewards/chosen": -0.17078793048858643, |
| "rewards/margins": 0.0026144247967749834, |
| "rewards/rejected": -0.1734023541212082, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.18, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 42.67715933224017, |
| "learning_rate": 4.905416503522123e-07, |
| "logits/chosen": -1.7043983936309814, |
| "logits/rejected": -1.7824294567108154, |
| "logps/chosen": -270.9886169433594, |
| "logps/pi_response": -429.6463928222656, |
| "logps/ref_response": -377.20526123046875, |
| "logps/rejected": -279.2468566894531, |
| "loss": 0.6933, |
| "rewards/accuracies": 0.5249999761581421, |
| "rewards/chosen": -0.38385888934135437, |
| "rewards/margins": 0.053137779235839844, |
| "rewards/rejected": -0.4369966983795166, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.24, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 15.90865641628214, |
| "learning_rate": 4.707922373336523e-07, |
| "logits/chosen": -2.0103068351745605, |
| "logits/rejected": -2.06811261177063, |
| "logps/chosen": -231.14230346679688, |
| "logps/pi_response": -350.94573974609375, |
| "logps/ref_response": -381.7485656738281, |
| "logps/rejected": -240.6361083984375, |
| "loss": 0.7047, |
| "rewards/accuracies": 0.4593749940395355, |
| "rewards/chosen": 0.17535772919654846, |
| "rewards/margins": -0.011809123679995537, |
| "rewards/rejected": 0.18716685473918915, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.3, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 15.507604404572884, |
| "learning_rate": 4.4113156629677313e-07, |
| "logits/chosen": -2.049783706665039, |
| "logits/rejected": -2.066466808319092, |
| "logps/chosen": -251.8543701171875, |
| "logps/pi_response": -378.17755126953125, |
| "logps/ref_response": -385.29022216796875, |
| "logps/rejected": -263.17840576171875, |
| "loss": 0.6992, |
| "rewards/accuracies": 0.503125011920929, |
| "rewards/chosen": 0.0042174034751951694, |
| "rewards/margins": 0.009917219169437885, |
| "rewards/rejected": -0.005699816159904003, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.36, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 26.8718224209532, |
| "learning_rate": 4.0289109058972283e-07, |
| "logits/chosen": -1.816748857498169, |
| "logits/rejected": -1.7844823598861694, |
| "logps/chosen": -294.067626953125, |
| "logps/pi_response": -430.2914123535156, |
| "logps/ref_response": -382.1808166503906, |
| "logps/rejected": -307.89300537109375, |
| "loss": 0.6871, |
| "rewards/accuracies": 0.534375011920929, |
| "rewards/chosen": -0.48830538988113403, |
| "rewards/margins": 0.05274411290884018, |
| "rewards/rejected": -0.5410495400428772, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.42, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 26.160189012700346, |
| "learning_rate": 3.577874068920446e-07, |
| "logits/chosen": -1.6282529830932617, |
| "logits/rejected": -1.6833488941192627, |
| "logps/chosen": -307.86944580078125, |
| "logps/pi_response": -454.16510009765625, |
| "logps/ref_response": -385.6980895996094, |
| "logps/rejected": -318.7071533203125, |
| "loss": 0.687, |
| "rewards/accuracies": 0.543749988079071, |
| "rewards/chosen": -0.6023891568183899, |
| "rewards/margins": 0.05064737796783447, |
| "rewards/rejected": -0.6530365347862244, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.48, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 29.331288950657562, |
| "learning_rate": 3.078451980100854e-07, |
| "logits/chosen": -1.6993424892425537, |
| "logits/rejected": -1.781658411026001, |
| "logps/chosen": -298.09710693359375, |
| "logps/pi_response": -443.031982421875, |
| "logps/ref_response": -377.09844970703125, |
| "logps/rejected": -306.07000732421875, |
| "loss": 0.687, |
| "rewards/accuracies": 0.5562499761581421, |
| "rewards/chosen": -0.5618255138397217, |
| "rewards/margins": 0.033075593411922455, |
| "rewards/rejected": -0.5949010848999023, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.55, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 19.393381596005668, |
| "learning_rate": 2.553063458334059e-07, |
| "logits/chosen": -1.794803261756897, |
| "logits/rejected": -1.7129312753677368, |
| "logps/chosen": -291.0694274902344, |
| "logps/pi_response": -433.5843811035156, |
| "logps/ref_response": -365.1755676269531, |
| "logps/rejected": -299.36712646484375, |
| "loss": 0.6865, |
| "rewards/accuracies": 0.5531250238418579, |
| "rewards/chosen": -0.5367754697799683, |
| "rewards/margins": 0.03232881426811218, |
| "rewards/rejected": -0.5691043138504028, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.61, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 25.069097284805487, |
| "learning_rate": 2.0252929432814287e-07, |
| "logits/chosen": -1.7748645544052124, |
| "logits/rejected": -1.7401233911514282, |
| "logps/chosen": -311.4417724609375, |
| "logps/pi_response": -464.7859802246094, |
| "logps/ref_response": -385.6819763183594, |
| "logps/rejected": -322.92425537109375, |
| "loss": 0.687, |
| "rewards/accuracies": 0.5093749761581421, |
| "rewards/chosen": -0.6161566972732544, |
| "rewards/margins": 0.018065042793750763, |
| "rewards/rejected": -0.6342216730117798, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.67, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 24.497323695269376, |
| "learning_rate": 1.5188318011445906e-07, |
| "logits/chosen": -1.5538889169692993, |
| "logits/rejected": -1.5146961212158203, |
| "logps/chosen": -327.43988037109375, |
| "logps/pi_response": -494.04071044921875, |
| "logps/ref_response": -391.55242919921875, |
| "logps/rejected": -343.62298583984375, |
| "loss": 0.6812, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": -0.8402616381645203, |
| "rewards/margins": 0.0964951142668724, |
| "rewards/rejected": -0.9367567896842957, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.73, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 26.960393670257023, |
| "learning_rate": 1.0564148305586295e-07, |
| "logits/chosen": -1.55520761013031, |
| "logits/rejected": -1.6926014423370361, |
| "logps/chosen": -302.15289306640625, |
| "logps/pi_response": -478.40008544921875, |
| "logps/ref_response": -395.8247985839844, |
| "logps/rejected": -316.0426940917969, |
| "loss": 0.6831, |
| "rewards/accuracies": 0.5249999761581421, |
| "rewards/chosen": -0.6136020421981812, |
| "rewards/margins": 0.05630939081311226, |
| "rewards/rejected": -0.6699115037918091, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.79, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 19.22478432809335, |
| "learning_rate": 6.587997083462196e-08, |
| "logits/chosen": -1.6059160232543945, |
| "logits/rejected": -1.7904043197631836, |
| "logps/chosen": -298.92108154296875, |
| "logps/pi_response": -464.8466796875, |
| "logps/ref_response": -377.23529052734375, |
| "logps/rejected": -310.23858642578125, |
| "loss": 0.6854, |
| "rewards/accuracies": 0.5531250238418579, |
| "rewards/chosen": -0.5502643585205078, |
| "rewards/margins": 0.04218818619847298, |
| "rewards/rejected": -0.5924525856971741, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.85, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 27.016415362773294, |
| "learning_rate": 3.438351873250492e-08, |
| "logits/chosen": -1.7260879278182983, |
| "logits/rejected": -1.7643539905548096, |
| "logps/chosen": -306.79022216796875, |
| "logps/pi_response": -471.447021484375, |
| "logps/ref_response": -384.4253845214844, |
| "logps/rejected": -320.1635437011719, |
| "loss": 0.6806, |
| "rewards/accuracies": 0.5843750238418579, |
| "rewards/chosen": -0.5997852087020874, |
| "rewards/margins": 0.06555557250976562, |
| "rewards/rejected": -0.665340781211853, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.91, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 21.580864096737645, |
| "learning_rate": 1.256598743236703e-08, |
| "logits/chosen": -1.6270942687988281, |
| "logits/rejected": -1.499762773513794, |
| "logps/chosen": -299.7687072753906, |
| "logps/pi_response": -473.4771423339844, |
| "logps/ref_response": -380.0408020019531, |
| "logps/rejected": -312.3251953125, |
| "loss": 0.6835, |
| "rewards/accuracies": 0.5687500238418579, |
| "rewards/chosen": -0.6862987875938416, |
| "rewards/margins": 0.05176978185772896, |
| "rewards/rejected": -0.7380686402320862, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.97, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 25.201629028850032, |
| "learning_rate": 1.406755487774386e-09, |
| "logits/chosen": -1.6800531148910522, |
| "logits/rejected": -1.5669732093811035, |
| "logps/chosen": -309.3727722167969, |
| "logps/pi_response": -477.26513671875, |
| "logps/ref_response": -381.9959411621094, |
| "logps/rejected": -319.76129150390625, |
| "loss": 0.6837, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": -0.6621077656745911, |
| "rewards/margins": 0.04643635079264641, |
| "rewards/rejected": -0.7085440754890442, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 165, |
| "total_flos": 0.0, |
| "train_loss": 0.6879499728029425, |
| "train_runtime": 34730.0556, |
| "train_samples_per_second": 0.609, |
| "train_steps_per_second": 0.005 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 165, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 200, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|