| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9999219055056618, | |
| "eval_steps": 500, | |
| "global_step": 3201, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05029285435376806, | |
| "grad_norm": 3.709867000579834, | |
| "learning_rate": 5.015576323987538e-07, | |
| "logits/chosen": -0.5341636538505554, | |
| "logits/rejected": -0.4424578845500946, | |
| "logps/chosen": -72.9678726196289, | |
| "logps/rejected": -9.562939643859863, | |
| "loss": 0.6897, | |
| "rewards/accuracies": 0.5854037404060364, | |
| "rewards/chosen": 0.006333178840577602, | |
| "rewards/margins": 0.007243483327329159, | |
| "rewards/rejected": -0.0009103047195822, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.10058570870753612, | |
| "grad_norm": 4.478637218475342, | |
| "learning_rate": 9.996527777777777e-07, | |
| "logits/chosen": -0.5466479063034058, | |
| "logits/rejected": -0.4609982669353485, | |
| "logps/chosen": -67.95721435546875, | |
| "logps/rejected": -10.801457405090332, | |
| "loss": 0.5858, | |
| "rewards/accuracies": 0.9448757767677307, | |
| "rewards/chosen": 0.21187880635261536, | |
| "rewards/margins": 0.2563822567462921, | |
| "rewards/rejected": -0.04450342431664467, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.15087856306130418, | |
| "grad_norm": 3.006552219390869, | |
| "learning_rate": 9.4375e-07, | |
| "logits/chosen": -0.5257240533828735, | |
| "logits/rejected": -0.4737217128276825, | |
| "logps/chosen": -55.05992126464844, | |
| "logps/rejected": -17.820192337036133, | |
| "loss": 0.2339, | |
| "rewards/accuracies": 0.9798136949539185, | |
| "rewards/chosen": 1.0976394414901733, | |
| "rewards/margins": 1.828528642654419, | |
| "rewards/rejected": -0.7308891415596008, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.20117141741507225, | |
| "grad_norm": 3.329214572906494, | |
| "learning_rate": 8.878472222222221e-07, | |
| "logits/chosen": -0.5365005135536194, | |
| "logits/rejected": -0.4758988916873932, | |
| "logps/chosen": -56.26280975341797, | |
| "logps/rejected": -29.085416793823242, | |
| "loss": 0.0705, | |
| "rewards/accuracies": 0.9852484464645386, | |
| "rewards/chosen": 1.5725164413452148, | |
| "rewards/margins": 3.528273344039917, | |
| "rewards/rejected": -1.9557571411132812, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.2514642717688403, | |
| "grad_norm": 0.14665871858596802, | |
| "learning_rate": 8.319444444444444e-07, | |
| "logits/chosen": -0.48797789216041565, | |
| "logits/rejected": -0.402078241109848, | |
| "logps/chosen": -51.26579666137695, | |
| "logps/rejected": -40.8376350402832, | |
| "loss": 0.0447, | |
| "rewards/accuracies": 0.9790372848510742, | |
| "rewards/chosen": 1.6765810251235962, | |
| "rewards/margins": 4.7392683029174805, | |
| "rewards/rejected": -3.062687397003174, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.30175712612260835, | |
| "grad_norm": 0.1438707709312439, | |
| "learning_rate": 7.760416666666666e-07, | |
| "logits/chosen": -0.48938173055648804, | |
| "logits/rejected": -0.3820492923259735, | |
| "logps/chosen": -50.945064544677734, | |
| "logps/rejected": -47.827144622802734, | |
| "loss": 0.0257, | |
| "rewards/accuracies": 0.989130437374115, | |
| "rewards/chosen": 1.6951720714569092, | |
| "rewards/margins": 5.385878562927246, | |
| "rewards/rejected": -3.6907060146331787, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.3520499804763764, | |
| "grad_norm": 2.6210033893585205, | |
| "learning_rate": 7.201388888888889e-07, | |
| "logits/chosen": -0.47775155305862427, | |
| "logits/rejected": -0.3484514653682709, | |
| "logps/chosen": -51.115570068359375, | |
| "logps/rejected": -48.93259811401367, | |
| "loss": 0.0334, | |
| "rewards/accuracies": 0.9852484464645386, | |
| "rewards/chosen": 1.7170895338058472, | |
| "rewards/margins": 5.645313262939453, | |
| "rewards/rejected": -3.928223133087158, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 0.4023428348301445, | |
| "grad_norm": 4.137578964233398, | |
| "learning_rate": 6.642361111111111e-07, | |
| "logits/chosen": -0.418182373046875, | |
| "logits/rejected": -0.27966105937957764, | |
| "logps/chosen": -50.84815979003906, | |
| "logps/rejected": -52.41380310058594, | |
| "loss": 0.031, | |
| "rewards/accuracies": 0.9836956858634949, | |
| "rewards/chosen": 1.7334542274475098, | |
| "rewards/margins": 5.92018985748291, | |
| "rewards/rejected": -4.1867356300354, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 0.45263568918391256, | |
| "grad_norm": 0.07434514909982681, | |
| "learning_rate": 6.083333333333333e-07, | |
| "logits/chosen": -0.4359574019908905, | |
| "logits/rejected": -0.27854442596435547, | |
| "logps/chosen": -52.3484992980957, | |
| "logps/rejected": -55.29338073730469, | |
| "loss": 0.0264, | |
| "rewards/accuracies": 0.9883540272712708, | |
| "rewards/chosen": 1.8382827043533325, | |
| "rewards/margins": 6.365357398986816, | |
| "rewards/rejected": -4.527073860168457, | |
| "step": 1449 | |
| }, | |
| { | |
| "epoch": 0.5029285435376806, | |
| "grad_norm": 0.17919230461120605, | |
| "learning_rate": 5.524305555555555e-07, | |
| "logits/chosen": -0.4308469891548157, | |
| "logits/rejected": -0.27532947063446045, | |
| "logps/chosen": -52.922943115234375, | |
| "logps/rejected": -57.74534225463867, | |
| "loss": 0.0238, | |
| "rewards/accuracies": 0.9860248565673828, | |
| "rewards/chosen": 1.7562286853790283, | |
| "rewards/margins": 6.457979679107666, | |
| "rewards/rejected": -4.701751708984375, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.5532213978914486, | |
| "grad_norm": 0.026971790939569473, | |
| "learning_rate": 4.965277777777777e-07, | |
| "logits/chosen": -0.3874114155769348, | |
| "logits/rejected": -0.2123931497335434, | |
| "logps/chosen": -54.096187591552734, | |
| "logps/rejected": -60.510658264160156, | |
| "loss": 0.0233, | |
| "rewards/accuracies": 0.9860248565673828, | |
| "rewards/chosen": 1.8518177270889282, | |
| "rewards/margins": 6.871143341064453, | |
| "rewards/rejected": -5.019325256347656, | |
| "step": 1771 | |
| }, | |
| { | |
| "epoch": 0.6035142522452167, | |
| "grad_norm": 0.09704186022281647, | |
| "learning_rate": 4.4062499999999996e-07, | |
| "logits/chosen": -0.37683001160621643, | |
| "logits/rejected": -0.19427433609962463, | |
| "logps/chosen": -54.42588424682617, | |
| "logps/rejected": -61.70278549194336, | |
| "loss": 0.0221, | |
| "rewards/accuracies": 0.986801266670227, | |
| "rewards/chosen": 1.8429250717163086, | |
| "rewards/margins": 7.024946212768555, | |
| "rewards/rejected": -5.182021141052246, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 0.6538071065989848, | |
| "grad_norm": 0.023908786475658417, | |
| "learning_rate": 3.8472222222222225e-07, | |
| "logits/chosen": -0.4016348421573639, | |
| "logits/rejected": -0.23176224529743195, | |
| "logps/chosen": -51.2470588684082, | |
| "logps/rejected": -64.10649871826172, | |
| "loss": 0.0229, | |
| "rewards/accuracies": 0.9883540272712708, | |
| "rewards/chosen": 1.6802482604980469, | |
| "rewards/margins": 7.057994365692139, | |
| "rewards/rejected": -5.377746105194092, | |
| "step": 2093 | |
| }, | |
| { | |
| "epoch": 0.7040999609527528, | |
| "grad_norm": 33.38786697387695, | |
| "learning_rate": 3.2881944444444443e-07, | |
| "logits/chosen": -0.38065940141677856, | |
| "logits/rejected": -0.19005167484283447, | |
| "logps/chosen": -52.53501892089844, | |
| "logps/rejected": -65.53170013427734, | |
| "loss": 0.0286, | |
| "rewards/accuracies": 0.986801266670227, | |
| "rewards/chosen": 1.750819444656372, | |
| "rewards/margins": 7.3389058113098145, | |
| "rewards/rejected": -5.5880866050720215, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 0.7543928153065209, | |
| "grad_norm": 33.96625518798828, | |
| "learning_rate": 2.729166666666666e-07, | |
| "logits/chosen": -0.36966800689697266, | |
| "logits/rejected": -0.18849784135818481, | |
| "logps/chosen": -51.86404800415039, | |
| "logps/rejected": -67.312255859375, | |
| "loss": 0.0249, | |
| "rewards/accuracies": 0.9883540272712708, | |
| "rewards/chosen": 1.648945927619934, | |
| "rewards/margins": 7.407442092895508, | |
| "rewards/rejected": -5.758496284484863, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 0.804685669660289, | |
| "grad_norm": 0.029783952981233597, | |
| "learning_rate": 2.1701388888888887e-07, | |
| "logits/chosen": -0.3718484044075012, | |
| "logits/rejected": -0.18486632406711578, | |
| "logps/chosen": -54.5934944152832, | |
| "logps/rejected": -68.00869750976562, | |
| "loss": 0.0317, | |
| "rewards/accuracies": 0.9852484464645386, | |
| "rewards/chosen": 1.6592566967010498, | |
| "rewards/margins": 7.394908905029297, | |
| "rewards/rejected": -5.735651969909668, | |
| "step": 2576 | |
| }, | |
| { | |
| "epoch": 0.854978524014057, | |
| "grad_norm": 0.015878599137067795, | |
| "learning_rate": 1.611111111111111e-07, | |
| "logits/chosen": -0.37658101320266724, | |
| "logits/rejected": -0.2032197117805481, | |
| "logps/chosen": -51.02374267578125, | |
| "logps/rejected": -68.18423461914062, | |
| "loss": 0.0295, | |
| "rewards/accuracies": 0.9852484464645386, | |
| "rewards/chosen": 1.6066235303878784, | |
| "rewards/margins": 7.458284854888916, | |
| "rewards/rejected": -5.851661205291748, | |
| "step": 2737 | |
| }, | |
| { | |
| "epoch": 0.9052713783678251, | |
| "grad_norm": 0.03441372141242027, | |
| "learning_rate": 1.0520833333333333e-07, | |
| "logits/chosen": -0.3815793991088867, | |
| "logits/rejected": -0.1944185197353363, | |
| "logps/chosen": -51.63225555419922, | |
| "logps/rejected": -68.67254638671875, | |
| "loss": 0.0241, | |
| "rewards/accuracies": 0.989130437374115, | |
| "rewards/chosen": 1.6394439935684204, | |
| "rewards/margins": 7.522336959838867, | |
| "rewards/rejected": -5.882892608642578, | |
| "step": 2898 | |
| }, | |
| { | |
| "epoch": 0.9555642327215931, | |
| "grad_norm": 0.028507934883236885, | |
| "learning_rate": 4.9305555555555555e-08, | |
| "logits/chosen": -0.35245007276535034, | |
| "logits/rejected": -0.17326129972934723, | |
| "logps/chosen": -54.05128479003906, | |
| "logps/rejected": -69.85031127929688, | |
| "loss": 0.0227, | |
| "rewards/accuracies": 0.9906832575798035, | |
| "rewards/chosen": 1.6639856100082397, | |
| "rewards/margins": 7.611756801605225, | |
| "rewards/rejected": -5.947770595550537, | |
| "step": 3059 | |
| }, | |
| { | |
| "epoch": 0.9999219055056618, | |
| "step": 3201, | |
| "total_flos": 1.8190117587218596e+18, | |
| "train_loss": 0.10125412571545654, | |
| "train_runtime": 44496.0824, | |
| "train_samples_per_second": 0.576, | |
| "train_steps_per_second": 0.072 | |
| } | |
| ], | |
| "logging_steps": 161, | |
| "max_steps": 3201, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.8190117587218596e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |