| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.998324958123953, | |
| "eval_steps": 100, | |
| "global_step": 149, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.3333333333333335e-07, | |
| "logits/chosen": -2.55656099319458, | |
| "logits/rejected": -2.55704402923584, | |
| "logps/chosen": -162.36532592773438, | |
| "logps/rejected": -172.43312072753906, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "logits/chosen": -2.666642189025879, | |
| "logits/rejected": -2.6329586505889893, | |
| "logps/chosen": -185.80641174316406, | |
| "logps/rejected": -184.18959045410156, | |
| "loss": 0.6928, | |
| "rewards/accuracies": 0.4548611044883728, | |
| "rewards/chosen": -0.001971459249034524, | |
| "rewards/margins": 0.0016002749325707555, | |
| "rewards/rejected": -0.003571733832359314, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.982842942906386e-06, | |
| "logits/chosen": -2.721466541290283, | |
| "logits/rejected": -2.67402982711792, | |
| "logps/chosen": -201.33470153808594, | |
| "logps/rejected": -194.63198852539062, | |
| "loss": 0.6949, | |
| "rewards/accuracies": 0.534375011920929, | |
| "rewards/chosen": -0.10967914760112762, | |
| "rewards/margins": 0.0006104880012571812, | |
| "rewards/rejected": -0.11028961837291718, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.846996204000967e-06, | |
| "logits/chosen": -2.625549077987671, | |
| "logits/rejected": -2.607099771499634, | |
| "logps/chosen": -197.00973510742188, | |
| "logps/rejected": -195.684326171875, | |
| "loss": 0.6939, | |
| "rewards/accuracies": 0.5093749761581421, | |
| "rewards/chosen": -0.08795476704835892, | |
| "rewards/margins": -0.002985857194289565, | |
| "rewards/rejected": -0.08496890217065811, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.582735470385229e-06, | |
| "logits/chosen": -2.633084535598755, | |
| "logits/rejected": -2.634359836578369, | |
| "logps/chosen": -185.71023559570312, | |
| "logps/rejected": -185.03509521484375, | |
| "loss": 0.6921, | |
| "rewards/accuracies": 0.5531250238418579, | |
| "rewards/chosen": -0.04532230272889137, | |
| "rewards/margins": 0.0011206632480025291, | |
| "rewards/rejected": -0.046442966908216476, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.204519553876095e-06, | |
| "logits/chosen": -2.6109485626220703, | |
| "logits/rejected": -2.6062893867492676, | |
| "logps/chosen": -171.55734252929688, | |
| "logps/rejected": -173.1212158203125, | |
| "loss": 0.6912, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.014199512079358101, | |
| "rewards/margins": 0.0028473488055169582, | |
| "rewards/rejected": -0.017046859487891197, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.7330422317447686e-06, | |
| "logits/chosen": -2.6457674503326416, | |
| "logits/rejected": -2.6292178630828857, | |
| "logps/chosen": -188.72109985351562, | |
| "logps/rejected": -184.30081176757812, | |
| "loss": 0.6905, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.00934157706797123, | |
| "rewards/margins": 0.002996337367221713, | |
| "rewards/rejected": -0.012337915599346161, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 3.1941000034687516e-06, | |
| "logits/chosen": -2.6249117851257324, | |
| "logits/rejected": -2.611786365509033, | |
| "logps/chosen": -170.21836853027344, | |
| "logps/rejected": -177.2921142578125, | |
| "loss": 0.6905, | |
| "rewards/accuracies": 0.534375011920929, | |
| "rewards/chosen": -0.03585924953222275, | |
| "rewards/margins": 0.0028343182057142258, | |
| "rewards/rejected": -0.03869356960058212, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 2.6171806561748503e-06, | |
| "logits/chosen": -2.5946967601776123, | |
| "logits/rejected": -2.571646213531494, | |
| "logps/chosen": -184.07911682128906, | |
| "logps/rejected": -189.4521942138672, | |
| "loss": 0.6896, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.019358564168214798, | |
| "rewards/margins": 0.011903460137546062, | |
| "rewards/rejected": -0.031262025237083435, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.0338498642707977e-06, | |
| "logits/chosen": -2.629333972930908, | |
| "logits/rejected": -2.6070916652679443, | |
| "logps/chosen": -183.12803649902344, | |
| "logps/rejected": -187.22447204589844, | |
| "loss": 0.6902, | |
| "rewards/accuracies": 0.565625011920929, | |
| "rewards/chosen": -0.0804746001958847, | |
| "rewards/margins": 0.01135367900133133, | |
| "rewards/rejected": -0.09182827174663544, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.4760240991587338e-06, | |
| "logits/chosen": -2.5863940715789795, | |
| "logits/rejected": -2.5969200134277344, | |
| "logps/chosen": -172.47142028808594, | |
| "logps/rejected": -180.51638793945312, | |
| "loss": 0.6889, | |
| "rewards/accuracies": 0.5531250238418579, | |
| "rewards/chosen": -0.05926589295268059, | |
| "rewards/margins": 0.011102231219410896, | |
| "rewards/rejected": -0.07036812603473663, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_logits/chosen": -2.5383808612823486, | |
| "eval_logits/rejected": -2.4414656162261963, | |
| "eval_logps/chosen": -307.9212646484375, | |
| "eval_logps/rejected": -301.2935485839844, | |
| "eval_loss": 0.6852481365203857, | |
| "eval_rewards/accuracies": 0.5659999847412109, | |
| "eval_rewards/chosen": -0.0951852798461914, | |
| "eval_rewards/margins": 0.019401030614972115, | |
| "eval_rewards/rejected": -0.11458631604909897, | |
| "eval_runtime": 382.181, | |
| "eval_samples_per_second": 5.233, | |
| "eval_steps_per_second": 0.654, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 9.742243453755202e-07, | |
| "logits/chosen": -2.612968683242798, | |
| "logits/rejected": -2.6128811836242676, | |
| "logps/chosen": -176.701416015625, | |
| "logps/rejected": -186.86849975585938, | |
| "loss": 0.6882, | |
| "rewards/accuracies": 0.5406249761581421, | |
| "rewards/chosen": -0.027405280619859695, | |
| "rewards/margins": 0.016830626875162125, | |
| "rewards/rejected": -0.04423590749502182, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 5.559061696656199e-07, | |
| "logits/chosen": -2.585603713989258, | |
| "logits/rejected": -2.5767314434051514, | |
| "logps/chosen": -184.33139038085938, | |
| "logps/rejected": -189.44131469726562, | |
| "loss": 0.686, | |
| "rewards/accuracies": 0.6031249761581421, | |
| "rewards/chosen": -0.03763353452086449, | |
| "rewards/margins": 0.025313779711723328, | |
| "rewards/rejected": -0.06294731795787811, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 2.4395751190352924e-07, | |
| "logits/chosen": -2.5703272819519043, | |
| "logits/rejected": -2.5677199363708496, | |
| "logps/chosen": -184.06341552734375, | |
| "logps/rejected": -188.58779907226562, | |
| "loss": 0.6884, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.06450501084327698, | |
| "rewards/margins": 0.007478479295969009, | |
| "rewards/rejected": -0.07198350131511688, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 5.544639001763719e-08, | |
| "logits/chosen": -2.6024391651153564, | |
| "logits/rejected": -2.5870203971862793, | |
| "logps/chosen": -199.04031372070312, | |
| "logps/rejected": -197.60330200195312, | |
| "loss": 0.686, | |
| "rewards/accuracies": 0.565625011920929, | |
| "rewards/chosen": -0.06172681599855423, | |
| "rewards/margins": 0.017358267679810524, | |
| "rewards/rejected": -0.0790850818157196, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 149, | |
| "total_flos": 0.0, | |
| "train_loss": 0.6901595840518107, | |
| "train_runtime": 6945.5042, | |
| "train_samples_per_second": 2.75, | |
| "train_steps_per_second": 0.021 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 149, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |