| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9984, |
| "eval_steps": 500, |
| "global_step": 156, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 16.074478059343143, |
| "learning_rate": 3.125e-08, |
| "logits/chosen": -1.9564645290374756, |
| "logits/rejected": -2.1290814876556396, |
| "logps/chosen": -144.1077423095703, |
| "logps/pi_response": -268.6929931640625, |
| "logps/ref_response": -268.6929931640625, |
| "logps/rejected": -144.41493225097656, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.06, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 17.576222912928348, |
| "learning_rate": 3.1249999999999997e-07, |
| "logits/chosen": -2.241427183151245, |
| "logits/rejected": -2.282970666885376, |
| "logps/chosen": -171.37808227539062, |
| "logps/pi_response": -273.0738525390625, |
| "logps/ref_response": -271.9916687011719, |
| "logps/rejected": -176.56832885742188, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.3923611044883728, |
| "rewards/chosen": -0.004230719991028309, |
| "rewards/margins": -0.0005770567222498357, |
| "rewards/rejected": -0.0036536632105708122, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.13, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 18.23257699755048, |
| "learning_rate": 4.989935734988097e-07, |
| "logits/chosen": -2.2886428833007812, |
| "logits/rejected": -2.1147801876068115, |
| "logps/chosen": -194.26535034179688, |
| "logps/pi_response": -308.6405029296875, |
| "logps/ref_response": -274.3199157714844, |
| "logps/rejected": -196.698974609375, |
| "loss": 0.6919, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -0.2213359773159027, |
| "rewards/margins": 0.020678246393799782, |
| "rewards/rejected": -0.24201424419879913, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.19, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 21.359473410005467, |
| "learning_rate": 4.877641290737883e-07, |
| "logits/chosen": -2.213491916656494, |
| "logits/rejected": -2.1212565898895264, |
| "logps/chosen": -213.91452026367188, |
| "logps/pi_response": -317.0865783691406, |
| "logps/ref_response": -260.5080261230469, |
| "logps/rejected": -215.670166015625, |
| "loss": 0.6897, |
| "rewards/accuracies": 0.543749988079071, |
| "rewards/chosen": -0.35239773988723755, |
| "rewards/margins": 0.03320372849702835, |
| "rewards/rejected": -0.3856014609336853, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.26, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 19.54689711054047, |
| "learning_rate": 4.646121984004665e-07, |
| "logits/chosen": -2.36901593208313, |
| "logits/rejected": -2.241117000579834, |
| "logps/chosen": -191.35202026367188, |
| "logps/pi_response": -293.92608642578125, |
| "logps/ref_response": -255.9798126220703, |
| "logps/rejected": -191.24124145507812, |
| "loss": 0.6947, |
| "rewards/accuracies": 0.5062500238418579, |
| "rewards/chosen": -0.22946178913116455, |
| "rewards/margins": 0.020651038736104965, |
| "rewards/rejected": -0.2501128315925598, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.32, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 19.66181931005281, |
| "learning_rate": 4.3069871595684787e-07, |
| "logits/chosen": -2.2629857063293457, |
| "logits/rejected": -2.1153407096862793, |
| "logps/chosen": -225.6036834716797, |
| "logps/pi_response": -330.4422912597656, |
| "logps/ref_response": -266.11285400390625, |
| "logps/rejected": -226.37161254882812, |
| "loss": 0.6836, |
| "rewards/accuracies": 0.49687498807907104, |
| "rewards/chosen": -0.4082844853401184, |
| "rewards/margins": 0.03162597864866257, |
| "rewards/rejected": -0.4399104118347168, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.38, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 34.081390496400246, |
| "learning_rate": 3.877242453630256e-07, |
| "logits/chosen": -2.3039848804473877, |
| "logits/rejected": -2.3428866863250732, |
| "logps/chosen": -220.15634155273438, |
| "logps/pi_response": -319.7514953613281, |
| "logps/ref_response": -254.2370147705078, |
| "logps/rejected": -226.6223907470703, |
| "loss": 0.6898, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -0.3762189447879791, |
| "rewards/margins": 0.03814256191253662, |
| "rewards/rejected": -0.41436153650283813, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.45, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 17.20872152727463, |
| "learning_rate": 3.378437060203357e-07, |
| "logits/chosen": -2.387434959411621, |
| "logits/rejected": -2.2482728958129883, |
| "logps/chosen": -199.58290100097656, |
| "logps/pi_response": -299.43707275390625, |
| "logps/ref_response": -256.967529296875, |
| "logps/rejected": -197.93199157714844, |
| "loss": 0.6856, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": -0.21634867787361145, |
| "rewards/margins": 0.03388797491788864, |
| "rewards/rejected": -0.2502366304397583, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.51, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 18.125911483507668, |
| "learning_rate": 2.8355831645441387e-07, |
| "logits/chosen": -2.2822232246398926, |
| "logits/rejected": -2.355548620223999, |
| "logps/chosen": -211.54409790039062, |
| "logps/pi_response": -338.62335205078125, |
| "logps/ref_response": -268.83172607421875, |
| "logps/rejected": -212.7510223388672, |
| "loss": 0.6795, |
| "rewards/accuracies": 0.5062500238418579, |
| "rewards/chosen": -0.39336004853248596, |
| "rewards/margins": 0.008717315271496773, |
| "rewards/rejected": -0.4020773470401764, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.58, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 18.45235135252255, |
| "learning_rate": 2.2759017277414164e-07, |
| "logits/chosen": -2.3078341484069824, |
| "logits/rejected": -2.3145835399627686, |
| "logps/chosen": -221.66226196289062, |
| "logps/pi_response": -324.65771484375, |
| "logps/ref_response": -253.67257690429688, |
| "logps/rejected": -230.7862091064453, |
| "loss": 0.6837, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.4485122263431549, |
| "rewards/margins": 0.015239333733916283, |
| "rewards/rejected": -0.46375155448913574, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.64, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 21.755132830081727, |
| "learning_rate": 1.7274575140626315e-07, |
| "logits/chosen": -2.324589729309082, |
| "logits/rejected": -2.312774181365967, |
| "logps/chosen": -224.4755401611328, |
| "logps/pi_response": -331.3367919921875, |
| "logps/ref_response": -261.8123474121094, |
| "logps/rejected": -226.1329345703125, |
| "loss": 0.6807, |
| "rewards/accuracies": 0.5687500238418579, |
| "rewards/chosen": -0.5234971642494202, |
| "rewards/margins": 0.04355122521519661, |
| "rewards/rejected": -0.5670484304428101, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.7, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 17.643769449739274, |
| "learning_rate": 1.2177518064852348e-07, |
| "logits/chosen": -2.396841526031494, |
| "logits/rejected": -2.2907986640930176, |
| "logps/chosen": -216.90243530273438, |
| "logps/pi_response": -318.94024658203125, |
| "logps/ref_response": -251.3756561279297, |
| "logps/rejected": -215.78512573242188, |
| "loss": 0.6808, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -0.42508357763290405, |
| "rewards/margins": 0.0596102774143219, |
| "rewards/rejected": -0.48469385504722595, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.77, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 19.96301055359274, |
| "learning_rate": 7.723433775328384e-08, |
| "logits/chosen": -2.3210701942443848, |
| "logits/rejected": -2.387702465057373, |
| "logps/chosen": -209.82119750976562, |
| "logps/pi_response": -329.6842956542969, |
| "logps/ref_response": -276.03692626953125, |
| "logps/rejected": -222.03341674804688, |
| "loss": 0.6769, |
| "rewards/accuracies": 0.5843750238418579, |
| "rewards/chosen": -0.32728347182273865, |
| "rewards/margins": 0.0734453871846199, |
| "rewards/rejected": -0.40072885155677795, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.83, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 20.360241724578835, |
| "learning_rate": 4.1356686569674335e-08, |
| "logits/chosen": -2.3041348457336426, |
| "logits/rejected": -2.2705655097961426, |
| "logps/chosen": -210.88119506835938, |
| "logps/pi_response": -328.033203125, |
| "logps/ref_response": -266.6432189941406, |
| "logps/rejected": -211.0803680419922, |
| "loss": 0.6748, |
| "rewards/accuracies": 0.565625011920929, |
| "rewards/chosen": -0.3793022036552429, |
| "rewards/margins": 0.05234457924962044, |
| "rewards/rejected": -0.43164676427841187, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.9, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 18.16724101735529, |
| "learning_rate": 1.5941282340065697e-08, |
| "logits/chosen": -2.401698350906372, |
| "logits/rejected": -2.386355400085449, |
| "logps/chosen": -201.68978881835938, |
| "logps/pi_response": -315.3774719238281, |
| "logps/ref_response": -254.541259765625, |
| "logps/rejected": -215.79934692382812, |
| "loss": 0.668, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.3424326777458191, |
| "rewards/margins": 0.09304080158472061, |
| "rewards/rejected": -0.4354734420776367, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.96, |
| "eta": 0.0010000000474974513, |
| "grad_norm": 21.61601513002701, |
| "learning_rate": 2.2625595580163247e-09, |
| "logits/chosen": -2.2798948287963867, |
| "logits/rejected": -2.293689489364624, |
| "logps/chosen": -211.88253784179688, |
| "logps/pi_response": -325.4713439941406, |
| "logps/ref_response": -264.48388671875, |
| "logps/rejected": -218.7720489501953, |
| "loss": 0.6717, |
| "rewards/accuracies": 0.528124988079071, |
| "rewards/chosen": -0.3841695487499237, |
| "rewards/margins": 0.059767745435237885, |
| "rewards/rejected": -0.4439373016357422, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 156, |
| "total_flos": 0.0, |
| "train_loss": 0.6820480842620898, |
| "train_runtime": 31897.7284, |
| "train_samples_per_second": 0.627, |
| "train_steps_per_second": 0.005 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 156, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 200, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|