| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.005925925925925926, | |
| "eval_steps": 500, | |
| "global_step": 10, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0005925925925925926, | |
| "grad_norm": 0.20394721627235413, | |
| "learning_rate": 5e-07, | |
| "logits/chosen": -4.444676399230957, | |
| "logits/rejected": -4.0909342765808105, | |
| "logps/chosen": -3.4668707847595215, | |
| "logps/rejected": -3.5732126235961914, | |
| "loss": 0.7019, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.34668707847595215, | |
| "rewards/margins": 0.010634180158376694, | |
| "rewards/rejected": -0.35732126235961914, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0011851851851851852, | |
| "grad_norm": 0.11779391765594482, | |
| "learning_rate": 4.849231551964771e-07, | |
| "logits/chosen": -4.530362606048584, | |
| "logits/rejected": -3.981240749359131, | |
| "logps/chosen": -3.0032825469970703, | |
| "logps/rejected": -3.3395447731018066, | |
| "loss": 0.6892, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.30032825469970703, | |
| "rewards/margins": 0.03362620621919632, | |
| "rewards/rejected": -0.33395445346832275, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0017777777777777779, | |
| "grad_norm": 0.30934685468673706, | |
| "learning_rate": 4.415111107797445e-07, | |
| "logits/chosen": -4.551039695739746, | |
| "logits/rejected": -4.332643508911133, | |
| "logps/chosen": -3.143617868423462, | |
| "logps/rejected": -3.5061092376708984, | |
| "loss": 0.6877, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.3143618106842041, | |
| "rewards/margins": 0.03624912351369858, | |
| "rewards/rejected": -0.3506109118461609, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0023703703703703703, | |
| "grad_norm": 0.38463467359542847, | |
| "learning_rate": 3.75e-07, | |
| "logits/chosen": -3.961301326751709, | |
| "logits/rejected": -4.457886219024658, | |
| "logps/chosen": -3.7372827529907227, | |
| "logps/rejected": -6.128005027770996, | |
| "loss": 0.6153, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.37372830510139465, | |
| "rewards/margins": 0.23907223343849182, | |
| "rewards/rejected": -0.6128004789352417, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.002962962962962963, | |
| "grad_norm": 0.28858768939971924, | |
| "learning_rate": 2.934120444167326e-07, | |
| "logits/chosen": -4.138806343078613, | |
| "logits/rejected": -4.704649925231934, | |
| "logps/chosen": -2.6420888900756836, | |
| "logps/rejected": -4.444168567657471, | |
| "loss": 0.6272, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.2642088830471039, | |
| "rewards/margins": 0.18020795285701752, | |
| "rewards/rejected": -0.4444168508052826, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0035555555555555557, | |
| "grad_norm": 0.18824324011802673, | |
| "learning_rate": 2.065879555832674e-07, | |
| "logits/chosen": -5.056676864624023, | |
| "logits/rejected": -5.151688098907471, | |
| "logps/chosen": -3.0351624488830566, | |
| "logps/rejected": -3.163884162902832, | |
| "loss": 0.6995, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.3035162687301636, | |
| "rewards/margins": 0.012872166931629181, | |
| "rewards/rejected": -0.31638842821121216, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.004148148148148148, | |
| "grad_norm": 0.10379564017057419, | |
| "learning_rate": 1.2500000000000005e-07, | |
| "logits/chosen": -4.240752696990967, | |
| "logits/rejected": -3.6786282062530518, | |
| "logps/chosen": -2.834850788116455, | |
| "logps/rejected": -3.4888226985931396, | |
| "loss": 0.6739, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.2834850549697876, | |
| "rewards/margins": 0.0653972178697586, | |
| "rewards/rejected": -0.3488822877407074, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.004740740740740741, | |
| "grad_norm": 0.1659402847290039, | |
| "learning_rate": 5.848888922025552e-08, | |
| "logits/chosen": -4.413921356201172, | |
| "logits/rejected": -4.417206764221191, | |
| "logps/chosen": -2.845391273498535, | |
| "logps/rejected": -3.0666072368621826, | |
| "loss": 0.6947, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.284539133310318, | |
| "rewards/margins": 0.022121611982584, | |
| "rewards/rejected": -0.3066607415676117, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.005333333333333333, | |
| "grad_norm": 0.8284716010093689, | |
| "learning_rate": 1.507684480352292e-08, | |
| "logits/chosen": -4.9146857261657715, | |
| "logits/rejected": -4.60509729385376, | |
| "logps/chosen": -5.166627883911133, | |
| "logps/rejected": -5.358448028564453, | |
| "loss": 0.7251, | |
| "rewards/accuracies": 0.25, | |
| "rewards/chosen": -0.5166628360748291, | |
| "rewards/margins": 0.019182000309228897, | |
| "rewards/rejected": -0.5358448028564453, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.005925925925925926, | |
| "grad_norm": 0.24434176087379456, | |
| "learning_rate": 0.0, | |
| "logits/chosen": -3.8119335174560547, | |
| "logits/rejected": -3.695971727371216, | |
| "logps/chosen": -3.6296451091766357, | |
| "logps/rejected": -3.969270706176758, | |
| "loss": 0.6888, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.36296454071998596, | |
| "rewards/margins": 0.03396253287792206, | |
| "rewards/rejected": -0.3969270884990692, | |
| "step": 10 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 10, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |