| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9984, | |
| "eval_steps": 500, | |
| "global_step": 156, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 48.569252014160156, | |
| "learning_rate": 6.249999999999999e-07, | |
| "logits/chosen": -1.8510873317718506, | |
| "logits/rejected": -0.29376277327537537, | |
| "logps/chosen": -214.10960388183594, | |
| "logps/rejected": -737.373291015625, | |
| "loss": 0.6961, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": 0.006722441408783197, | |
| "rewards/margins": 0.026040678843855858, | |
| "rewards/rejected": -0.019318239763379097, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 14.449111938476562, | |
| "learning_rate": 9.979871469976195e-07, | |
| "logits/chosen": -1.8497416973114014, | |
| "logits/rejected": -0.2751065790653229, | |
| "logps/chosen": -240.32241821289062, | |
| "logps/rejected": -844.6730346679688, | |
| "loss": 0.4739, | |
| "rewards/accuracies": 0.9468749761581421, | |
| "rewards/chosen": 0.029739724472165108, | |
| "rewards/margins": 0.6849702596664429, | |
| "rewards/rejected": -0.6552305817604065, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 0.7451657056808472, | |
| "learning_rate": 9.755282581475767e-07, | |
| "logits/chosen": -2.152054786682129, | |
| "logits/rejected": -0.8167506456375122, | |
| "logps/chosen": -241.89797973632812, | |
| "logps/rejected": -830.0802612304688, | |
| "loss": 0.1085, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": -0.33556511998176575, | |
| "rewards/margins": 4.663994312286377, | |
| "rewards/rejected": -4.99955940246582, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 0.9800211191177368, | |
| "learning_rate": 9.29224396800933e-07, | |
| "logits/chosen": -2.6119227409362793, | |
| "logits/rejected": -1.6330392360687256, | |
| "logps/chosen": -261.77215576171875, | |
| "logps/rejected": -951.4508666992188, | |
| "loss": 0.0372, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -2.5927298069000244, | |
| "rewards/margins": 14.331648826599121, | |
| "rewards/rejected": -16.924379348754883, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.08055471628904343, | |
| "learning_rate": 8.613974319136957e-07, | |
| "logits/chosen": -2.8284103870391846, | |
| "logits/rejected": -2.016091823577881, | |
| "logps/chosen": -267.1216735839844, | |
| "logps/rejected": -1083.85693359375, | |
| "loss": 0.0246, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": -4.838659286499023, | |
| "rewards/margins": 24.708829879760742, | |
| "rewards/rejected": -29.547487258911133, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 0.07454250752925873, | |
| "learning_rate": 7.754484907260512e-07, | |
| "logits/chosen": -2.8392865657806396, | |
| "logits/rejected": -2.14508056640625, | |
| "logps/chosen": -296.3800964355469, | |
| "logps/rejected": -1137.64892578125, | |
| "loss": 0.02, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": -6.760532379150391, | |
| "rewards/margins": 29.1066951751709, | |
| "rewards/rejected": -35.86723327636719, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 0.10226113349199295, | |
| "learning_rate": 6.756874120406714e-07, | |
| "logits/chosen": -2.863455295562744, | |
| "logits/rejected": -2.1374523639678955, | |
| "logps/chosen": -282.3721008300781, | |
| "logps/rejected": -1143.27587890625, | |
| "loss": 0.0304, | |
| "rewards/accuracies": 0.9874999523162842, | |
| "rewards/chosen": -6.247722148895264, | |
| "rewards/margins": 30.036664962768555, | |
| "rewards/rejected": -36.28438949584961, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 0.06721244752407074, | |
| "learning_rate": 5.671166329088277e-07, | |
| "logits/chosen": -2.756829023361206, | |
| "logits/rejected": -2.045252799987793, | |
| "logps/chosen": -291.5985412597656, | |
| "logps/rejected": -1122.9361572265625, | |
| "loss": 0.019, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -5.791654586791992, | |
| "rewards/margins": 27.7723331451416, | |
| "rewards/rejected": -33.563987731933594, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 0.07295263558626175, | |
| "learning_rate": 4.5518034554828327e-07, | |
| "logits/chosen": -2.751217842102051, | |
| "logits/rejected": -2.025172233581543, | |
| "logps/chosen": -286.55694580078125, | |
| "logps/rejected": -1081.6651611328125, | |
| "loss": 0.0353, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/chosen": -5.1540751457214355, | |
| "rewards/margins": 25.992502212524414, | |
| "rewards/rejected": -31.146577835083008, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.08157353848218918, | |
| "learning_rate": 3.454915028125263e-07, | |
| "logits/chosen": -2.746480703353882, | |
| "logits/rejected": -1.9662470817565918, | |
| "logps/chosen": -271.390869140625, | |
| "logps/rejected": -1062.62109375, | |
| "loss": 0.0169, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -4.289627552032471, | |
| "rewards/margins": 24.77456283569336, | |
| "rewards/rejected": -29.06418800354004, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.704, | |
| "grad_norm": 0.1165793165564537, | |
| "learning_rate": 2.4355036129704696e-07, | |
| "logits/chosen": -2.7295165061950684, | |
| "logits/rejected": -1.951841950416565, | |
| "logps/chosen": -265.2632141113281, | |
| "logps/rejected": -1057.536376953125, | |
| "loss": 0.0252, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -4.240163803100586, | |
| "rewards/margins": 23.72610092163086, | |
| "rewards/rejected": -27.966266632080078, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.768, | |
| "grad_norm": 0.06965842097997665, | |
| "learning_rate": 1.5446867550656767e-07, | |
| "logits/chosen": -2.7134926319122314, | |
| "logits/rejected": -1.9151828289031982, | |
| "logps/chosen": -272.508056640625, | |
| "logps/rejected": -1058.3094482421875, | |
| "loss": 0.0668, | |
| "rewards/accuracies": 0.9906249642372131, | |
| "rewards/chosen": -4.029051303863525, | |
| "rewards/margins": 23.791399002075195, | |
| "rewards/rejected": -27.82044792175293, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.832, | |
| "grad_norm": 0.06551803648471832, | |
| "learning_rate": 8.271337313934867e-08, | |
| "logits/chosen": -2.6546225547790527, | |
| "logits/rejected": -1.8665531873703003, | |
| "logps/chosen": -285.0191650390625, | |
| "logps/rejected": -1041.3951416015625, | |
| "loss": 0.036, | |
| "rewards/accuracies": 0.9812500476837158, | |
| "rewards/chosen": -3.9543297290802, | |
| "rewards/margins": 22.66595458984375, | |
| "rewards/rejected": -26.620285034179688, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.896, | |
| "grad_norm": 0.07770426571369171, | |
| "learning_rate": 3.188256468013139e-08, | |
| "logits/chosen": -2.7073161602020264, | |
| "logits/rejected": -1.8695634603500366, | |
| "logps/chosen": -265.3280029296875, | |
| "logps/rejected": -1062.6976318359375, | |
| "loss": 0.0197, | |
| "rewards/accuracies": 0.996874988079071, | |
| "rewards/chosen": -3.531825304031372, | |
| "rewards/margins": 23.626256942749023, | |
| "rewards/rejected": -27.158079147338867, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.06992675364017487, | |
| "learning_rate": 4.5251191160326495e-09, | |
| "logits/chosen": -2.6707656383514404, | |
| "logits/rejected": -1.817920446395874, | |
| "logps/chosen": -286.8282165527344, | |
| "logps/rejected": -1117.4290771484375, | |
| "loss": 0.0668, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -3.840498924255371, | |
| "rewards/margins": 23.849292755126953, | |
| "rewards/rejected": -27.68979263305664, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.9984, | |
| "step": 156, | |
| "total_flos": 1.1115841451898962e+18, | |
| "train_loss": 0.10875998093531682, | |
| "train_runtime": 5515.1168, | |
| "train_samples_per_second": 0.907, | |
| "train_steps_per_second": 0.028 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 156, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.1115841451898962e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |