| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9997303855486654, | |
| "eval_steps": 500, | |
| "global_step": 1854, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.050148287948234026, | |
| "grad_norm": 3.929126501083374, | |
| "learning_rate": 5e-07, | |
| "logits/chosen": -0.5141991972923279, | |
| "logits/rejected": -0.47028154134750366, | |
| "logps/chosen": -69.07454681396484, | |
| "logps/rejected": -14.869856834411621, | |
| "loss": 0.6929, | |
| "rewards/accuracies": 0.48521506786346436, | |
| "rewards/chosen": -1.161982163466746e-05, | |
| "rewards/margins": 0.0009046773775480688, | |
| "rewards/rejected": -0.0009162970818579197, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.10029657589646805, | |
| "grad_norm": 7.2451324462890625, | |
| "learning_rate": 1e-06, | |
| "logits/chosen": -0.5093461871147156, | |
| "logits/rejected": -0.45741066336631775, | |
| "logps/chosen": -76.25039672851562, | |
| "logps/rejected": -15.682782173156738, | |
| "loss": 0.6682, | |
| "rewards/accuracies": 0.7553763389587402, | |
| "rewards/chosen": 0.043871019035577774, | |
| "rewards/margins": 0.052321143448352814, | |
| "rewards/rejected": -0.00845012441277504, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.15044486384470207, | |
| "grad_norm": 3.729768991470337, | |
| "learning_rate": 9.442446043165467e-07, | |
| "logits/chosen": -0.49496081471443176, | |
| "logits/rejected": -0.4462580382823944, | |
| "logps/chosen": -73.93294525146484, | |
| "logps/rejected": -17.590185165405273, | |
| "loss": 0.5259, | |
| "rewards/accuracies": 0.8602150678634644, | |
| "rewards/chosen": 0.3595849275588989, | |
| "rewards/margins": 0.43593457341194153, | |
| "rewards/rejected": -0.07634969055652618, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.2005931517929361, | |
| "grad_norm": 2.1467108726501465, | |
| "learning_rate": 8.884892086330935e-07, | |
| "logits/chosen": -0.5146396160125732, | |
| "logits/rejected": -0.4725695848464966, | |
| "logps/chosen": -67.33499145507812, | |
| "logps/rejected": -21.152362823486328, | |
| "loss": 0.3566, | |
| "rewards/accuracies": 0.9408602118492126, | |
| "rewards/chosen": 0.8917596340179443, | |
| "rewards/margins": 1.2377275228500366, | |
| "rewards/rejected": -0.3459678590297699, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.25074143974117014, | |
| "grad_norm": 2.8724427223205566, | |
| "learning_rate": 8.327338129496403e-07, | |
| "logits/chosen": -0.5053039193153381, | |
| "logits/rejected": -0.46124857664108276, | |
| "logps/chosen": -60.683773040771484, | |
| "logps/rejected": -23.27366828918457, | |
| "loss": 0.2328, | |
| "rewards/accuracies": 0.9731183052062988, | |
| "rewards/chosen": 1.1787759065628052, | |
| "rewards/margins": 1.961389183998108, | |
| "rewards/rejected": -0.7826132774353027, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.30088972768940414, | |
| "grad_norm": 4.906219959259033, | |
| "learning_rate": 7.769784172661871e-07, | |
| "logits/chosen": -0.5105525851249695, | |
| "logits/rejected": -0.46868896484375, | |
| "logps/chosen": -57.557701110839844, | |
| "logps/rejected": -29.869394302368164, | |
| "loss": 0.1128, | |
| "rewards/accuracies": 0.9798387289047241, | |
| "rewards/chosen": 1.250539779663086, | |
| "rewards/margins": 2.861812114715576, | |
| "rewards/rejected": -1.6112723350524902, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.3510380156376382, | |
| "grad_norm": 0.3975902497768402, | |
| "learning_rate": 7.212230215827337e-07, | |
| "logits/chosen": -0.51771479845047, | |
| "logits/rejected": -0.46667957305908203, | |
| "logps/chosen": -61.55564880371094, | |
| "logps/rejected": -39.249454498291016, | |
| "loss": 0.0582, | |
| "rewards/accuracies": 0.9865591526031494, | |
| "rewards/chosen": 1.4144244194030762, | |
| "rewards/margins": 3.826444149017334, | |
| "rewards/rejected": -2.412019729614258, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.4011863035858722, | |
| "grad_norm": 2.5660245418548584, | |
| "learning_rate": 6.654676258992805e-07, | |
| "logits/chosen": -0.46265119314193726, | |
| "logits/rejected": -0.426276832818985, | |
| "logps/chosen": -56.20552062988281, | |
| "logps/rejected": -43.43349838256836, | |
| "loss": 0.0577, | |
| "rewards/accuracies": 0.9811828136444092, | |
| "rewards/chosen": 1.415700912475586, | |
| "rewards/margins": 4.192880153656006, | |
| "rewards/rejected": -2.77717924118042, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.4513345915341062, | |
| "grad_norm": 0.4916980564594269, | |
| "learning_rate": 6.097122302158273e-07, | |
| "logits/chosen": -0.5033692121505737, | |
| "logits/rejected": -0.4418078064918518, | |
| "logps/chosen": -59.61499786376953, | |
| "logps/rejected": -45.10437774658203, | |
| "loss": 0.0413, | |
| "rewards/accuracies": 0.9852150678634644, | |
| "rewards/chosen": 1.577943205833435, | |
| "rewards/margins": 4.589221477508545, | |
| "rewards/rejected": -3.0112783908843994, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.5014828794823403, | |
| "grad_norm": 0.3903834819793701, | |
| "learning_rate": 5.539568345323741e-07, | |
| "logits/chosen": -0.44615066051483154, | |
| "logits/rejected": -0.3858674168586731, | |
| "logps/chosen": -59.962955474853516, | |
| "logps/rejected": -47.591575622558594, | |
| "loss": 0.0306, | |
| "rewards/accuracies": 0.9879032373428345, | |
| "rewards/chosen": 1.6375161409378052, | |
| "rewards/margins": 4.831850528717041, | |
| "rewards/rejected": -3.1943342685699463, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.5516311674305743, | |
| "grad_norm": 0.27728018164634705, | |
| "learning_rate": 4.982014388489209e-07, | |
| "logits/chosen": -0.42113569378852844, | |
| "logits/rejected": -0.37468260526657104, | |
| "logps/chosen": -58.571006774902344, | |
| "logps/rejected": -50.136695861816406, | |
| "loss": 0.0334, | |
| "rewards/accuracies": 0.9865591526031494, | |
| "rewards/chosen": 1.5126001834869385, | |
| "rewards/margins": 4.904977798461914, | |
| "rewards/rejected": -3.3923778533935547, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 0.6017794553788083, | |
| "grad_norm": 0.25181448459625244, | |
| "learning_rate": 4.424460431654676e-07, | |
| "logits/chosen": -0.41716840863227844, | |
| "logits/rejected": -0.35924020409584045, | |
| "logps/chosen": -56.86894226074219, | |
| "logps/rejected": -51.47317123413086, | |
| "loss": 0.0338, | |
| "rewards/accuracies": 0.9838709831237793, | |
| "rewards/chosen": 1.5967961549758911, | |
| "rewards/margins": 5.1519880294799805, | |
| "rewards/rejected": -3.5551917552948, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.6519277433270423, | |
| "grad_norm": 20.254880905151367, | |
| "learning_rate": 3.8669064748201436e-07, | |
| "logits/chosen": -0.4112766981124878, | |
| "logits/rejected": -0.33467066287994385, | |
| "logps/chosen": -59.09377670288086, | |
| "logps/rejected": -51.40492630004883, | |
| "loss": 0.0345, | |
| "rewards/accuracies": 0.9879032373428345, | |
| "rewards/chosen": 1.6269216537475586, | |
| "rewards/margins": 5.300109386444092, | |
| "rewards/rejected": -3.6731879711151123, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 0.7020760312752764, | |
| "grad_norm": 0.0816323384642601, | |
| "learning_rate": 3.309352517985611e-07, | |
| "logits/chosen": -0.4078998267650604, | |
| "logits/rejected": -0.3409072160720825, | |
| "logps/chosen": -54.6790771484375, | |
| "logps/rejected": -53.65495300292969, | |
| "loss": 0.0335, | |
| "rewards/accuracies": 0.9865591526031494, | |
| "rewards/chosen": 1.5546735525131226, | |
| "rewards/margins": 5.438999652862549, | |
| "rewards/rejected": -3.8843259811401367, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 0.7522243192235104, | |
| "grad_norm": 10.449593544006348, | |
| "learning_rate": 2.751798561151079e-07, | |
| "logits/chosen": -0.41149967908859253, | |
| "logits/rejected": -0.3351740837097168, | |
| "logps/chosen": -58.309329986572266, | |
| "logps/rejected": -52.98766326904297, | |
| "loss": 0.0298, | |
| "rewards/accuracies": 0.9879032373428345, | |
| "rewards/chosen": 1.7052394151687622, | |
| "rewards/margins": 5.557784557342529, | |
| "rewards/rejected": -3.8525450229644775, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.8023726071717444, | |
| "grad_norm": 1.828351378440857, | |
| "learning_rate": 2.1942446043165465e-07, | |
| "logits/chosen": -0.41116863489151, | |
| "logits/rejected": -0.3420298993587494, | |
| "logps/chosen": -57.310306549072266, | |
| "logps/rejected": -56.159385681152344, | |
| "loss": 0.0199, | |
| "rewards/accuracies": 0.9932795763015747, | |
| "rewards/chosen": 1.6173115968704224, | |
| "rewards/margins": 5.636691093444824, | |
| "rewards/rejected": -4.019379615783691, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 0.8525208951199784, | |
| "grad_norm": 0.3853701651096344, | |
| "learning_rate": 1.6366906474820144e-07, | |
| "logits/chosen": -0.386165589094162, | |
| "logits/rejected": -0.314485102891922, | |
| "logps/chosen": -59.73184585571289, | |
| "logps/rejected": -56.95379638671875, | |
| "loss": 0.0239, | |
| "rewards/accuracies": 0.9905914068222046, | |
| "rewards/chosen": 1.7348586320877075, | |
| "rewards/margins": 5.7297468185424805, | |
| "rewards/rejected": -3.9948880672454834, | |
| "step": 1581 | |
| }, | |
| { | |
| "epoch": 0.9026691830682124, | |
| "grad_norm": 0.15693414211273193, | |
| "learning_rate": 1.0791366906474819e-07, | |
| "logits/chosen": -0.4054364860057831, | |
| "logits/rejected": -0.33498746156692505, | |
| "logps/chosen": -59.102989196777344, | |
| "logps/rejected": -57.07588195800781, | |
| "loss": 0.0154, | |
| "rewards/accuracies": 0.9946236610412598, | |
| "rewards/chosen": 1.6817787885665894, | |
| "rewards/margins": 5.79107141494751, | |
| "rewards/rejected": -4.109292984008789, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 0.9528174710164464, | |
| "grad_norm": 0.12521210312843323, | |
| "learning_rate": 5.2158273381294966e-08, | |
| "logits/chosen": -0.40175333619117737, | |
| "logits/rejected": -0.32109692692756653, | |
| "logps/chosen": -56.866294860839844, | |
| "logps/rejected": -57.35883331298828, | |
| "loss": 0.0239, | |
| "rewards/accuracies": 0.9905914068222046, | |
| "rewards/chosen": 1.6757546663284302, | |
| "rewards/margins": 5.896215438842773, | |
| "rewards/rejected": -4.220460414886475, | |
| "step": 1767 | |
| }, | |
| { | |
| "epoch": 0.9997303855486654, | |
| "step": 1854, | |
| "total_flos": 1.1904720283191214e+18, | |
| "train_loss": 0.15248539275464632, | |
| "train_runtime": 30932.1025, | |
| "train_samples_per_second": 0.48, | |
| "train_steps_per_second": 0.06 | |
| } | |
| ], | |
| "logging_steps": 93, | |
| "max_steps": 1854, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.1904720283191214e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |