| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.8368200836820083, |
| "eval_steps": 500, |
| "global_step": 100, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.008368200836820083, |
| "grad_norm": 9.75007616796795, |
| "learning_rate": 4.166666666666666e-08, |
| "logits/chosen": -2.761155366897583, |
| "logits/rejected": -2.7110397815704346, |
| "logps/chosen": -188.2120819091797, |
| "logps/rejected": -227.97329711914062, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.08368200836820083, |
| "grad_norm": 8.857631148542579, |
| "learning_rate": 4.1666666666666667e-07, |
| "logits/chosen": -2.752246856689453, |
| "logits/rejected": -2.7512197494506836, |
| "logps/chosen": -286.0818786621094, |
| "logps/rejected": -270.5452880859375, |
| "loss": 0.6927, |
| "rewards/accuracies": 0.4791666567325592, |
| "rewards/chosen": 0.0016119987703859806, |
| "rewards/margins": 0.0004296954721212387, |
| "rewards/rejected": 0.001182303298264742, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.16736401673640167, |
| "grad_norm": 8.648350893662677, |
| "learning_rate": 4.931352528237397e-07, |
| "logits/chosen": -2.816281795501709, |
| "logits/rejected": -2.7909739017486572, |
| "logps/chosen": -281.4281311035156, |
| "logps/rejected": -259.23931884765625, |
| "loss": 0.6846, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": 0.02481575682759285, |
| "rewards/margins": 0.019779205322265625, |
| "rewards/rejected": 0.00503655057400465, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.2510460251046025, |
| "grad_norm": 9.462527888383683, |
| "learning_rate": 4.658920803689553e-07, |
| "logits/chosen": -2.7952637672424316, |
| "logits/rejected": -2.7442078590393066, |
| "logps/chosen": -272.9776306152344, |
| "logps/rejected": -227.1405792236328, |
| "loss": 0.6647, |
| "rewards/accuracies": 0.7437499761581421, |
| "rewards/chosen": 0.021379830315709114, |
| "rewards/margins": 0.0722198635339737, |
| "rewards/rejected": -0.05084002763032913, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.33472803347280333, |
| "grad_norm": 10.802670702282992, |
| "learning_rate": 4.201712553872657e-07, |
| "logits/chosen": -2.808018445968628, |
| "logits/rejected": -2.7901508808135986, |
| "logps/chosen": -269.8119812011719, |
| "logps/rejected": -276.64312744140625, |
| "loss": 0.6451, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.11026054620742798, |
| "rewards/margins": 0.09342513978481293, |
| "rewards/rejected": -0.2036857157945633, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.41841004184100417, |
| "grad_norm": 14.0383641925761, |
| "learning_rate": 3.598859066780754e-07, |
| "logits/chosen": -2.801910877227783, |
| "logits/rejected": -2.7805628776550293, |
| "logps/chosen": -308.90960693359375, |
| "logps/rejected": -296.39837646484375, |
| "loss": 0.6148, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -0.1705964356660843, |
| "rewards/margins": 0.27841097116470337, |
| "rewards/rejected": -0.44900742173194885, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.502092050209205, |
| "grad_norm": 17.441329707174045, |
| "learning_rate": 2.9019570347986706e-07, |
| "logits/chosen": -2.8074169158935547, |
| "logits/rejected": -2.7729382514953613, |
| "logps/chosen": -314.9671325683594, |
| "logps/rejected": -307.9980773925781, |
| "loss": 0.6091, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.3278293013572693, |
| "rewards/margins": 0.2875938415527344, |
| "rewards/rejected": -0.6154230833053589, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.5857740585774058, |
| "grad_norm": 13.990328041846443, |
| "learning_rate": 2.1706525253979534e-07, |
| "logits/chosen": -2.808964252471924, |
| "logits/rejected": -2.7739078998565674, |
| "logps/chosen": -318.0167541503906, |
| "logps/rejected": -318.5818786621094, |
| "loss": 0.6029, |
| "rewards/accuracies": 0.668749988079071, |
| "rewards/chosen": -0.400399774312973, |
| "rewards/margins": 0.22247812151908875, |
| "rewards/rejected": -0.6228778958320618, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.6694560669456067, |
| "grad_norm": 17.12917679172734, |
| "learning_rate": 1.4675360263490295e-07, |
| "logits/chosen": -2.7654800415039062, |
| "logits/rejected": -2.7729649543762207, |
| "logps/chosen": -259.6241149902344, |
| "logps/rejected": -305.7326354980469, |
| "loss": 0.6021, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.29664868116378784, |
| "rewards/margins": 0.25704866647720337, |
| "rewards/rejected": -0.5536972880363464, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.7531380753138075, |
| "grad_norm": 19.058433635316273, |
| "learning_rate": 8.527854855097224e-08, |
| "logits/chosen": -2.727175712585449, |
| "logits/rejected": -2.7393229007720947, |
| "logps/chosen": -297.5341491699219, |
| "logps/rejected": -314.9481201171875, |
| "loss": 0.5793, |
| "rewards/accuracies": 0.6812499761581421, |
| "rewards/chosen": -0.36374929547309875, |
| "rewards/margins": 0.3506723940372467, |
| "rewards/rejected": -0.7144217491149902, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.8368200836820083, |
| "grad_norm": 15.477637607113444, |
| "learning_rate": 3.790158337517127e-08, |
| "logits/chosen": -2.7408394813537598, |
| "logits/rejected": -2.671611785888672, |
| "logps/chosen": -293.8873596191406, |
| "logps/rejected": -300.93572998046875, |
| "loss": 0.5918, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -0.41290682554244995, |
| "rewards/margins": 0.2459905445575714, |
| "rewards/rejected": -0.6588973999023438, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.8368200836820083, |
| "step": 100, |
| "total_flos": 0.0, |
| "train_loss": 0.0, |
| "train_runtime": 7.2976, |
| "train_samples_per_second": 2094.247, |
| "train_steps_per_second": 8.085 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 59, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "total_flos": 0.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|