| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 100, |
| "global_step": 309, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.003236245954692557, |
| "grad_norm": 1360270.551939551, |
| "learning_rate": 1.6129032258064514e-08, |
| "logits/chosen": -2.3401248455047607, |
| "logits/rejected": -2.024223566055298, |
| "logps/chosen": -96.9656982421875, |
| "logps/rejected": -92.04792785644531, |
| "loss": 131017.8359, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.032362459546925564, |
| "grad_norm": 1416555.5626202887, |
| "learning_rate": 1.6129032258064515e-07, |
| "logits/chosen": -2.2171170711517334, |
| "logits/rejected": -2.1732587814331055, |
| "logps/chosen": -99.1213150024414, |
| "logps/rejected": -100.28742218017578, |
| "loss": 128366.8056, |
| "rewards/accuracies": 0.4861111044883728, |
| "rewards/chosen": 8.025332499528304e-05, |
| "rewards/margins": 1.1366943908797111e-05, |
| "rewards/rejected": 6.888638745294884e-05, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.06472491909385113, |
| "grad_norm": 1304570.4126210073, |
| "learning_rate": 3.225806451612903e-07, |
| "logits/chosen": -1.9858334064483643, |
| "logits/rejected": -1.9541418552398682, |
| "logps/chosen": -118.43696594238281, |
| "logps/rejected": -126.21803283691406, |
| "loss": 128692.025, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.00015103544865269214, |
| "rewards/margins": 0.0007216802914626896, |
| "rewards/rejected": -0.0008727157255634665, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0970873786407767, |
| "grad_norm": 1685863.1415844476, |
| "learning_rate": 4.838709677419355e-07, |
| "logits/chosen": -2.1170787811279297, |
| "logits/rejected": -2.0741684436798096, |
| "logps/chosen": -122.6366195678711, |
| "logps/rejected": -123.0445556640625, |
| "loss": 127097.55, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.006598073057830334, |
| "rewards/margins": 0.0006057058344595134, |
| "rewards/rejected": -0.00720377778634429, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.12944983818770225, |
| "grad_norm": 2033398.3838454094, |
| "learning_rate": 4.838129496402878e-07, |
| "logits/chosen": -1.9161779880523682, |
| "logits/rejected": -1.9276437759399414, |
| "logps/chosen": -124.73970794677734, |
| "logps/rejected": -142.6216278076172, |
| "loss": 127465.975, |
| "rewards/accuracies": 0.4625000059604645, |
| "rewards/chosen": -0.011538518592715263, |
| "rewards/margins": -6.826107710367069e-05, |
| "rewards/rejected": -0.011470255441963673, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.16181229773462782, |
| "grad_norm": 2147117.1383440965, |
| "learning_rate": 4.6582733812949637e-07, |
| "logits/chosen": -1.8840440511703491, |
| "logits/rejected": -1.912096619606018, |
| "logps/chosen": -114.17850494384766, |
| "logps/rejected": -126.81815338134766, |
| "loss": 125939.7875, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -0.014376411214470863, |
| "rewards/margins": 0.004386500921100378, |
| "rewards/rejected": -0.01876291260123253, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1941747572815534, |
| "grad_norm": 3071038.1324342038, |
| "learning_rate": 4.4784172661870503e-07, |
| "logits/chosen": -1.9911772012710571, |
| "logits/rejected": -1.9808628559112549, |
| "logps/chosen": -126.23087310791016, |
| "logps/rejected": -129.49313354492188, |
| "loss": 125104.1375, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.01957254856824875, |
| "rewards/margins": 0.00518994452431798, |
| "rewards/rejected": -0.02476249262690544, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.22653721682847897, |
| "grad_norm": 2209031.0455667987, |
| "learning_rate": 4.2985611510791364e-07, |
| "logits/chosen": -2.054396629333496, |
| "logits/rejected": -2.045442581176758, |
| "logps/chosen": -135.28916931152344, |
| "logps/rejected": -161.77029418945312, |
| "loss": 128463.0375, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -0.014407733455300331, |
| "rewards/margins": 0.02120479941368103, |
| "rewards/rejected": -0.03561253100633621, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2588996763754045, |
| "grad_norm": 2430368.543406948, |
| "learning_rate": 4.118705035971223e-07, |
| "logits/chosen": -2.226501941680908, |
| "logits/rejected": -2.1979050636291504, |
| "logps/chosen": -133.3325958251953, |
| "logps/rejected": -139.7587890625, |
| "loss": 126775.15, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -0.015011170879006386, |
| "rewards/margins": 0.008515884168446064, |
| "rewards/rejected": -0.023527055978775024, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.2912621359223301, |
| "grad_norm": 2247623.918406063, |
| "learning_rate": 3.938848920863309e-07, |
| "logits/chosen": -2.219113349914551, |
| "logits/rejected": -2.2480850219726562, |
| "logps/chosen": -117.96885681152344, |
| "logps/rejected": -130.67233276367188, |
| "loss": 127866.3875, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.012072126381099224, |
| "rewards/margins": 0.008730234578251839, |
| "rewards/rejected": -0.020802360028028488, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.32362459546925565, |
| "grad_norm": 2316830.1973713143, |
| "learning_rate": 3.7589928057553957e-07, |
| "logits/chosen": -2.1961560249328613, |
| "logits/rejected": -2.1941027641296387, |
| "logps/chosen": -113.81181335449219, |
| "logps/rejected": -124.9216079711914, |
| "loss": 127081.4125, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.013967941515147686, |
| "rewards/margins": 0.011136903427541256, |
| "rewards/rejected": -0.025104844942688942, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.3559870550161812, |
| "grad_norm": 2348041.868872815, |
| "learning_rate": 3.579136690647482e-07, |
| "logits/chosen": -2.1430392265319824, |
| "logits/rejected": -2.132408857345581, |
| "logps/chosen": -131.83375549316406, |
| "logps/rejected": -148.13539123535156, |
| "loss": 128332.6, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.016523033380508423, |
| "rewards/margins": 0.012288080528378487, |
| "rewards/rejected": -0.02881111577153206, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.3883495145631068, |
| "grad_norm": 2751314.8746971595, |
| "learning_rate": 3.3992805755395684e-07, |
| "logits/chosen": -2.0588231086730957, |
| "logits/rejected": -2.0491485595703125, |
| "logps/chosen": -131.24148559570312, |
| "logps/rejected": -131.8287811279297, |
| "loss": 124869.35, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.02066906914114952, |
| "rewards/margins": 0.008971886709332466, |
| "rewards/rejected": -0.029640953987836838, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.42071197411003236, |
| "grad_norm": 2954863.531074605, |
| "learning_rate": 3.2194244604316545e-07, |
| "logits/chosen": -2.176112174987793, |
| "logits/rejected": -2.1641287803649902, |
| "logps/chosen": -114.12776947021484, |
| "logps/rejected": -134.8932342529297, |
| "loss": 125655.875, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -0.014023621566593647, |
| "rewards/margins": 0.009770934469997883, |
| "rewards/rejected": -0.02379455789923668, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.45307443365695793, |
| "grad_norm": 2721434.057235538, |
| "learning_rate": 3.039568345323741e-07, |
| "logits/chosen": -2.119755983352661, |
| "logits/rejected": -2.1205615997314453, |
| "logps/chosen": -129.28509521484375, |
| "logps/rejected": -148.20298767089844, |
| "loss": 127386.875, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -0.018853366374969482, |
| "rewards/margins": 0.009598040953278542, |
| "rewards/rejected": -0.028451403602957726, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.4854368932038835, |
| "grad_norm": 2504226.4472642345, |
| "learning_rate": 2.859712230215827e-07, |
| "logits/chosen": -2.11161732673645, |
| "logits/rejected": -2.109983205795288, |
| "logps/chosen": -120.16899108886719, |
| "logps/rejected": -141.44065856933594, |
| "loss": 126322.575, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.01785941980779171, |
| "rewards/margins": 0.012752829119563103, |
| "rewards/rejected": -0.030612248927354813, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.517799352750809, |
| "grad_norm": 2616765.3697665106, |
| "learning_rate": 2.679856115107914e-07, |
| "logits/chosen": -1.996541976928711, |
| "logits/rejected": -1.9952741861343384, |
| "logps/chosen": -129.80105590820312, |
| "logps/rejected": -140.31661987304688, |
| "loss": 124525.2625, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.020018702372908592, |
| "rewards/margins": 0.0044175852090120316, |
| "rewards/rejected": -0.024436287581920624, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.5501618122977346, |
| "grad_norm": 2628992.2929540966, |
| "learning_rate": 2.5e-07, |
| "logits/chosen": -1.9918062686920166, |
| "logits/rejected": -1.9948949813842773, |
| "logps/chosen": -122.13346099853516, |
| "logps/rejected": -133.2384033203125, |
| "loss": 126019.025, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.02550223097205162, |
| "rewards/margins": 0.009997823275625706, |
| "rewards/rejected": -0.03550005704164505, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.5825242718446602, |
| "grad_norm": 3094183.1568820253, |
| "learning_rate": 2.3201438848920862e-07, |
| "logits/chosen": -2.043708086013794, |
| "logits/rejected": -2.081475019454956, |
| "logps/chosen": -140.36175537109375, |
| "logps/rejected": -164.3885498046875, |
| "loss": 126201.7375, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -0.02569858729839325, |
| "rewards/margins": 0.01428176648914814, |
| "rewards/rejected": -0.03998035565018654, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6148867313915858, |
| "grad_norm": 2334840.857672469, |
| "learning_rate": 2.1402877697841726e-07, |
| "logits/chosen": -2.0947163105010986, |
| "logits/rejected": -2.076787233352661, |
| "logps/chosen": -150.43527221679688, |
| "logps/rejected": -155.5640869140625, |
| "loss": 125959.45, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.02467101439833641, |
| "rewards/margins": 0.007912294939160347, |
| "rewards/rejected": -0.03258330747485161, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.6472491909385113, |
| "grad_norm": 2397922.5168117452, |
| "learning_rate": 1.960431654676259e-07, |
| "logits/chosen": -2.1827738285064697, |
| "logits/rejected": -2.2014429569244385, |
| "logps/chosen": -123.22265625, |
| "logps/rejected": -133.67408752441406, |
| "loss": 127908.025, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.014766250737011433, |
| "rewards/margins": 0.009688087739050388, |
| "rewards/rejected": -0.02445433847606182, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.6796116504854369, |
| "grad_norm": 2303701.8499174784, |
| "learning_rate": 1.7805755395683453e-07, |
| "logits/chosen": -2.1952743530273438, |
| "logits/rejected": -2.1957383155822754, |
| "logps/chosen": -129.71133422851562, |
| "logps/rejected": -145.0044708251953, |
| "loss": 126285.625, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": -0.016956012696027756, |
| "rewards/margins": 0.005377567373216152, |
| "rewards/rejected": -0.022333581000566483, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.7119741100323624, |
| "grad_norm": 2593411.1890004175, |
| "learning_rate": 1.6007194244604316e-07, |
| "logits/chosen": -2.103020429611206, |
| "logits/rejected": -2.0637974739074707, |
| "logps/chosen": -135.33511352539062, |
| "logps/rejected": -136.0529022216797, |
| "loss": 124637.35, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.01790453866124153, |
| "rewards/margins": 0.00530865928158164, |
| "rewards/rejected": -0.02321319654583931, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.7443365695792881, |
| "grad_norm": 2508091.9460668135, |
| "learning_rate": 1.420863309352518e-07, |
| "logits/chosen": -2.093636989593506, |
| "logits/rejected": -2.0820887088775635, |
| "logps/chosen": -132.9607391357422, |
| "logps/rejected": -140.61473083496094, |
| "loss": 125915.3875, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": -0.02344880998134613, |
| "rewards/margins": 0.009993510320782661, |
| "rewards/rejected": -0.03344232216477394, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.7766990291262136, |
| "grad_norm": 2952343.349225862, |
| "learning_rate": 1.2410071942446043e-07, |
| "logits/chosen": -2.1840648651123047, |
| "logits/rejected": -2.139603853225708, |
| "logps/chosen": -138.78213500976562, |
| "logps/rejected": -151.81393432617188, |
| "loss": 124670.15, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.01453552208840847, |
| "rewards/margins": 0.018607601523399353, |
| "rewards/rejected": -0.03314312547445297, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.8090614886731392, |
| "grad_norm": 2784040.2228916725, |
| "learning_rate": 1.0611510791366907e-07, |
| "logits/chosen": -2.088812828063965, |
| "logits/rejected": -2.1028780937194824, |
| "logps/chosen": -119.4928207397461, |
| "logps/rejected": -133.47744750976562, |
| "loss": 125451.8625, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.014928264543414116, |
| "rewards/margins": 0.013005532324314117, |
| "rewards/rejected": -0.027933796867728233, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.8414239482200647, |
| "grad_norm": 2462140.645377509, |
| "learning_rate": 8.812949640287769e-08, |
| "logits/chosen": -2.04371976852417, |
| "logits/rejected": -2.0019214153289795, |
| "logps/chosen": -117.63133239746094, |
| "logps/rejected": -143.88394165039062, |
| "loss": 124118.375, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.013766065239906311, |
| "rewards/margins": 0.020614679902791977, |
| "rewards/rejected": -0.034380748867988586, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.8737864077669902, |
| "grad_norm": 2692483.2231479157, |
| "learning_rate": 7.014388489208632e-08, |
| "logits/chosen": -2.137110710144043, |
| "logits/rejected": -2.142268657684326, |
| "logps/chosen": -127.5798110961914, |
| "logps/rejected": -138.69114685058594, |
| "loss": 124476.1, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.017709199339151382, |
| "rewards/margins": 0.011823633685708046, |
| "rewards/rejected": -0.02953283116221428, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.9061488673139159, |
| "grad_norm": 2810003.763461115, |
| "learning_rate": 5.2158273381294966e-08, |
| "logits/chosen": -2.0531792640686035, |
| "logits/rejected": -2.0200252532958984, |
| "logps/chosen": -141.08401489257812, |
| "logps/rejected": -143.46482849121094, |
| "loss": 125078.825, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.014201803132891655, |
| "rewards/margins": 0.014019514434039593, |
| "rewards/rejected": -0.028221318498253822, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.9385113268608414, |
| "grad_norm": 2782445.5671562827, |
| "learning_rate": 3.41726618705036e-08, |
| "logits/chosen": -2.185438632965088, |
| "logits/rejected": -2.166260004043579, |
| "logps/chosen": -133.27760314941406, |
| "logps/rejected": -140.8465118408203, |
| "loss": 124712.5, |
| "rewards/accuracies": 0.5375000238418579, |
| "rewards/chosen": -0.026157760992646217, |
| "rewards/margins": 0.005216036923229694, |
| "rewards/rejected": -0.03137379512190819, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.970873786407767, |
| "grad_norm": 2793792.227085043, |
| "learning_rate": 1.618705035971223e-08, |
| "logits/chosen": -2.130183696746826, |
| "logits/rejected": -2.1379125118255615, |
| "logps/chosen": -112.70601654052734, |
| "logps/rejected": -126.3117904663086, |
| "loss": 126240.075, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.022287605330348015, |
| "rewards/margins": 0.010505763813853264, |
| "rewards/rejected": -0.03279336914420128, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 309, |
| "total_flos": 0.0, |
| "train_loss": 126137.32107099515, |
| "train_runtime": 4733.3263, |
| "train_samples_per_second": 4.176, |
| "train_steps_per_second": 0.065 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 309, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|