{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.20276497695852536, "eval_steps": 5, "global_step": 44, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02304147465437788, "eval_logits/chosen": -1.1632846593856812, "eval_logits/rejected": -0.8827418088912964, "eval_logps/chosen": -731.2137451171875, "eval_logps/rejected": -465.1360778808594, "eval_loss": 0.7025490403175354, "eval_rewards/accuracies": 0.625, "eval_rewards/chosen": 0.08797025680541992, "eval_rewards/margins": 0.07403016090393066, "eval_rewards/rejected": 0.013940095901489258, "eval_runtime": 2.5482, "eval_samples_per_second": 9.811, "eval_steps_per_second": 1.57, "step": 5 }, { "epoch": 0.04608294930875576, "grad_norm": 86.91683959960938, "learning_rate": 4.799948609147061e-07, "logits/chosen": -1.066173791885376, "logits/rejected": -0.9449604749679565, "logps/chosen": -674.0364379882812, "logps/rejected": -356.2140808105469, "loss": 0.777, "rewards/accuracies": 0.30000001192092896, "rewards/chosen": -0.1258804351091385, "rewards/margins": -0.14030227065086365, "rewards/rejected": 0.014421844854950905, "step": 10 }, { "epoch": 0.04608294930875576, "eval_logits/chosen": -1.1648622751235962, "eval_logits/rejected": -0.8837531208992004, "eval_logps/chosen": -731.4483642578125, "eval_logps/rejected": -465.3337707519531, "eval_loss": 0.7411171197891235, "eval_rewards/accuracies": 0.625, "eval_rewards/chosen": -0.029358863830566406, "eval_rewards/margins": 0.055533647537231445, "eval_rewards/rejected": -0.08489251136779785, "eval_runtime": 2.2132, "eval_samples_per_second": 11.296, "eval_steps_per_second": 1.807, "step": 10 }, { "epoch": 0.06912442396313365, "eval_logits/chosen": -1.1638308763504028, "eval_logits/rejected": -0.8826640844345093, "eval_logps/chosen": -731.311767578125, "eval_logps/rejected": -465.1154479980469, "eval_loss": 0.7261512279510498, "eval_rewards/accuracies": 0.625, "eval_rewards/chosen": 0.03898191452026367, "eval_rewards/margins": 0.01471400260925293, "eval_rewards/rejected": 0.024267911911010742, "eval_runtime": 2.2289, "eval_samples_per_second": 11.216, "eval_steps_per_second": 1.795, "step": 15 }, { "epoch": 0.09216589861751152, "grad_norm": 133.0209197998047, "learning_rate": 3.3865122176063385e-07, "logits/chosen": -1.1327978372573853, "logits/rejected": -0.9894822239875793, "logps/chosen": -718.1174926757812, "logps/rejected": -418.946533203125, "loss": 0.685, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": 0.0425872802734375, "rewards/margins": 0.05570220947265625, "rewards/rejected": -0.01311492919921875, "step": 20 }, { "epoch": 0.09216589861751152, "eval_logits/chosen": -1.165111780166626, "eval_logits/rejected": -0.8841784000396729, "eval_logps/chosen": -731.49267578125, "eval_logps/rejected": -465.2555847167969, "eval_loss": 0.7530465722084045, "eval_rewards/accuracies": 0.53125, "eval_rewards/chosen": -0.0514984130859375, "eval_rewards/margins": -0.005685091018676758, "eval_rewards/rejected": -0.04581332206726074, "eval_runtime": 2.2284, "eval_samples_per_second": 11.219, "eval_steps_per_second": 1.795, "step": 20 }, { "epoch": 0.1152073732718894, "eval_logits/chosen": -1.1639286279678345, "eval_logits/rejected": -0.8834071159362793, "eval_logps/chosen": -731.60693359375, "eval_logps/rejected": -465.266845703125, "eval_loss": 0.7450304627418518, "eval_rewards/accuracies": 0.3125, "eval_rewards/chosen": -0.10858917236328125, "eval_rewards/margins": -0.05713796615600586, "eval_rewards/rejected": -0.05145120620727539, "eval_runtime": 2.2254, "eval_samples_per_second": 11.234, "eval_steps_per_second": 1.797, "step": 25 }, { "epoch": 0.1382488479262673, "grad_norm": 112.3116683959961, "learning_rate": 1.428268596492364e-07, "logits/chosen": -1.0476138591766357, "logits/rejected": -0.9094411730766296, "logps/chosen": -582.4729614257812, "logps/rejected": -353.1932678222656, "loss": 0.7665, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.1044158935546875, "rewards/margins": -0.09558334201574326, "rewards/rejected": -0.008832549676299095, "step": 30 }, { "epoch": 0.1382488479262673, "eval_logits/chosen": -1.1638270616531372, "eval_logits/rejected": -0.8826746344566345, "eval_logps/chosen": -731.166259765625, "eval_logps/rejected": -465.258544921875, "eval_loss": 0.6754930019378662, "eval_rewards/accuracies": 0.65625, "eval_rewards/chosen": 0.11172008514404297, "eval_rewards/margins": 0.1590101718902588, "eval_rewards/rejected": -0.04729008674621582, "eval_runtime": 2.2212, "eval_samples_per_second": 11.255, "eval_steps_per_second": 1.801, "step": 30 }, { "epoch": 0.16129032258064516, "eval_logits/chosen": -1.1637563705444336, "eval_logits/rejected": -0.882408857345581, "eval_logps/chosen": -731.4846801757812, "eval_logps/rejected": -465.2984313964844, "eval_loss": 0.718708336353302, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -0.04746055603027344, "eval_rewards/margins": 0.01976180076599121, "eval_rewards/rejected": -0.06722235679626465, "eval_runtime": 2.2257, "eval_samples_per_second": 11.232, "eval_steps_per_second": 1.797, "step": 35 }, { "epoch": 0.18433179723502305, "grad_norm": 121.56043243408203, "learning_rate": 1.2865889513213628e-08, "logits/chosen": -1.1619962453842163, "logits/rejected": -0.9679350852966309, "logps/chosen": -642.6929931640625, "logps/rejected": -397.54107666015625, "loss": 0.7523, "rewards/accuracies": 0.5, "rewards/chosen": -0.057281494140625, "rewards/margins": -0.09049377590417862, "rewards/rejected": 0.03321228176355362, "step": 40 }, { "epoch": 0.18433179723502305, "eval_logits/chosen": -1.164229393005371, "eval_logits/rejected": -0.8833534717559814, "eval_logps/chosen": -731.2298583984375, "eval_logps/rejected": -465.13787841796875, "eval_loss": 0.6944708228111267, "eval_rewards/accuracies": 0.59375, "eval_rewards/chosen": 0.07993173599243164, "eval_rewards/margins": 0.06688284873962402, "eval_rewards/rejected": 0.013048887252807617, "eval_runtime": 2.225, "eval_samples_per_second": 11.236, "eval_steps_per_second": 1.798, "step": 40 } ], "logging_steps": 10, "max_steps": 44, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }