| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.20276497695852536, | |
| "eval_steps": 5, | |
| "global_step": 44, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02304147465437788, | |
| "eval_logits/chosen": -1.1632846593856812, | |
| "eval_logits/rejected": -0.8827418088912964, | |
| "eval_logps/chosen": -731.2137451171875, | |
| "eval_logps/rejected": -465.1360778808594, | |
| "eval_loss": 0.7025490403175354, | |
| "eval_rewards/accuracies": 0.625, | |
| "eval_rewards/chosen": 0.08797025680541992, | |
| "eval_rewards/margins": 0.07403016090393066, | |
| "eval_rewards/rejected": 0.013940095901489258, | |
| "eval_runtime": 2.5482, | |
| "eval_samples_per_second": 9.811, | |
| "eval_steps_per_second": 1.57, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.04608294930875576, | |
| "grad_norm": 86.91683959960938, | |
| "learning_rate": 4.799948609147061e-07, | |
| "logits/chosen": -1.066173791885376, | |
| "logits/rejected": -0.9449604749679565, | |
| "logps/chosen": -674.0364379882812, | |
| "logps/rejected": -356.2140808105469, | |
| "loss": 0.777, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.1258804351091385, | |
| "rewards/margins": -0.14030227065086365, | |
| "rewards/rejected": 0.014421844854950905, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.04608294930875576, | |
| "eval_logits/chosen": -1.1648622751235962, | |
| "eval_logits/rejected": -0.8837531208992004, | |
| "eval_logps/chosen": -731.4483642578125, | |
| "eval_logps/rejected": -465.3337707519531, | |
| "eval_loss": 0.7411171197891235, | |
| "eval_rewards/accuracies": 0.625, | |
| "eval_rewards/chosen": -0.029358863830566406, | |
| "eval_rewards/margins": 0.055533647537231445, | |
| "eval_rewards/rejected": -0.08489251136779785, | |
| "eval_runtime": 2.2132, | |
| "eval_samples_per_second": 11.296, | |
| "eval_steps_per_second": 1.807, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.06912442396313365, | |
| "eval_logits/chosen": -1.1638308763504028, | |
| "eval_logits/rejected": -0.8826640844345093, | |
| "eval_logps/chosen": -731.311767578125, | |
| "eval_logps/rejected": -465.1154479980469, | |
| "eval_loss": 0.7261512279510498, | |
| "eval_rewards/accuracies": 0.625, | |
| "eval_rewards/chosen": 0.03898191452026367, | |
| "eval_rewards/margins": 0.01471400260925293, | |
| "eval_rewards/rejected": 0.024267911911010742, | |
| "eval_runtime": 2.2289, | |
| "eval_samples_per_second": 11.216, | |
| "eval_steps_per_second": 1.795, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.09216589861751152, | |
| "grad_norm": 133.0209197998047, | |
| "learning_rate": 3.3865122176063385e-07, | |
| "logits/chosen": -1.1327978372573853, | |
| "logits/rejected": -0.9894822239875793, | |
| "logps/chosen": -718.1174926757812, | |
| "logps/rejected": -418.946533203125, | |
| "loss": 0.685, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": 0.0425872802734375, | |
| "rewards/margins": 0.05570220947265625, | |
| "rewards/rejected": -0.01311492919921875, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.09216589861751152, | |
| "eval_logits/chosen": -1.165111780166626, | |
| "eval_logits/rejected": -0.8841784000396729, | |
| "eval_logps/chosen": -731.49267578125, | |
| "eval_logps/rejected": -465.2555847167969, | |
| "eval_loss": 0.7530465722084045, | |
| "eval_rewards/accuracies": 0.53125, | |
| "eval_rewards/chosen": -0.0514984130859375, | |
| "eval_rewards/margins": -0.005685091018676758, | |
| "eval_rewards/rejected": -0.04581332206726074, | |
| "eval_runtime": 2.2284, | |
| "eval_samples_per_second": 11.219, | |
| "eval_steps_per_second": 1.795, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.1152073732718894, | |
| "eval_logits/chosen": -1.1639286279678345, | |
| "eval_logits/rejected": -0.8834071159362793, | |
| "eval_logps/chosen": -731.60693359375, | |
| "eval_logps/rejected": -465.266845703125, | |
| "eval_loss": 0.7450304627418518, | |
| "eval_rewards/accuracies": 0.3125, | |
| "eval_rewards/chosen": -0.10858917236328125, | |
| "eval_rewards/margins": -0.05713796615600586, | |
| "eval_rewards/rejected": -0.05145120620727539, | |
| "eval_runtime": 2.2254, | |
| "eval_samples_per_second": 11.234, | |
| "eval_steps_per_second": 1.797, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.1382488479262673, | |
| "grad_norm": 112.3116683959961, | |
| "learning_rate": 1.428268596492364e-07, | |
| "logits/chosen": -1.0476138591766357, | |
| "logits/rejected": -0.9094411730766296, | |
| "logps/chosen": -582.4729614257812, | |
| "logps/rejected": -353.1932678222656, | |
| "loss": 0.7665, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.1044158935546875, | |
| "rewards/margins": -0.09558334201574326, | |
| "rewards/rejected": -0.008832549676299095, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.1382488479262673, | |
| "eval_logits/chosen": -1.1638270616531372, | |
| "eval_logits/rejected": -0.8826746344566345, | |
| "eval_logps/chosen": -731.166259765625, | |
| "eval_logps/rejected": -465.258544921875, | |
| "eval_loss": 0.6754930019378662, | |
| "eval_rewards/accuracies": 0.65625, | |
| "eval_rewards/chosen": 0.11172008514404297, | |
| "eval_rewards/margins": 0.1590101718902588, | |
| "eval_rewards/rejected": -0.04729008674621582, | |
| "eval_runtime": 2.2212, | |
| "eval_samples_per_second": 11.255, | |
| "eval_steps_per_second": 1.801, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.16129032258064516, | |
| "eval_logits/chosen": -1.1637563705444336, | |
| "eval_logits/rejected": -0.882408857345581, | |
| "eval_logps/chosen": -731.4846801757812, | |
| "eval_logps/rejected": -465.2984313964844, | |
| "eval_loss": 0.718708336353302, | |
| "eval_rewards/accuracies": 0.6875, | |
| "eval_rewards/chosen": -0.04746055603027344, | |
| "eval_rewards/margins": 0.01976180076599121, | |
| "eval_rewards/rejected": -0.06722235679626465, | |
| "eval_runtime": 2.2257, | |
| "eval_samples_per_second": 11.232, | |
| "eval_steps_per_second": 1.797, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.18433179723502305, | |
| "grad_norm": 121.56043243408203, | |
| "learning_rate": 1.2865889513213628e-08, | |
| "logits/chosen": -1.1619962453842163, | |
| "logits/rejected": -0.9679350852966309, | |
| "logps/chosen": -642.6929931640625, | |
| "logps/rejected": -397.54107666015625, | |
| "loss": 0.7523, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.057281494140625, | |
| "rewards/margins": -0.09049377590417862, | |
| "rewards/rejected": 0.03321228176355362, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.18433179723502305, | |
| "eval_logits/chosen": -1.164229393005371, | |
| "eval_logits/rejected": -0.8833534717559814, | |
| "eval_logps/chosen": -731.2298583984375, | |
| "eval_logps/rejected": -465.13787841796875, | |
| "eval_loss": 0.6944708228111267, | |
| "eval_rewards/accuracies": 0.59375, | |
| "eval_rewards/chosen": 0.07993173599243164, | |
| "eval_rewards/margins": 0.06688284873962402, | |
| "eval_rewards/rejected": 0.013048887252807617, | |
| "eval_runtime": 2.225, | |
| "eval_samples_per_second": 11.236, | |
| "eval_steps_per_second": 1.798, | |
| "step": 40 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 44, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |