| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 636, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 7.8125e-06, | |
| "logps/chosen": -122.16297149658203, | |
| "logps/rejected": -71.43323516845703, | |
| "loss": 0.4952, | |
| "losses/dpo": 0.4956728219985962, | |
| "losses/sft": 0.7316558957099915, | |
| "losses/total": 0.4956728219985962, | |
| "ref_logps/chosen": -127.74378204345703, | |
| "ref_logps/rejected": -70.59587860107422, | |
| "rewards/accuracies": 0.8070000410079956, | |
| "rewards/chosen": 0.5580801367759705, | |
| "rewards/margins": 0.6418154239654541, | |
| "rewards/rejected": -0.08373536914587021, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.5625e-05, | |
| "logps/chosen": -94.26205444335938, | |
| "logps/rejected": -85.60575866699219, | |
| "loss": 0.0691, | |
| "losses/dpo": 0.07388682663440704, | |
| "losses/sft": 0.5650071501731873, | |
| "losses/total": 0.07388682663440704, | |
| "ref_logps/chosen": -128.54661560058594, | |
| "ref_logps/rejected": -72.49893951416016, | |
| "rewards/accuracies": 0.9929999709129333, | |
| "rewards/chosen": 3.428455114364624, | |
| "rewards/margins": 4.739137649536133, | |
| "rewards/rejected": -1.310682773590088, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.9615384615384617e-05, | |
| "logps/chosen": -85.07345581054688, | |
| "logps/rejected": -101.97691345214844, | |
| "loss": 0.0179, | |
| "losses/dpo": 0.014726839028298855, | |
| "losses/sft": 0.5030468106269836, | |
| "losses/total": 0.014726839028298855, | |
| "ref_logps/chosen": -129.9876708984375, | |
| "ref_logps/rejected": -72.3249282836914, | |
| "rewards/accuracies": 0.9989999532699585, | |
| "rewards/chosen": 4.491419792175293, | |
| "rewards/margins": 7.45661735534668, | |
| "rewards/rejected": -2.9651970863342285, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.8741258741258744e-05, | |
| "logps/chosen": -85.64691162109375, | |
| "logps/rejected": -110.90087890625, | |
| "loss": 0.0096, | |
| "losses/dpo": 0.012412017211318016, | |
| "losses/sft": 0.5199429988861084, | |
| "losses/total": 0.012412017211318016, | |
| "ref_logps/chosen": -130.2884979248047, | |
| "ref_logps/rejected": -71.44290924072266, | |
| "rewards/accuracies": 0.9984999299049377, | |
| "rewards/chosen": 4.464157581329346, | |
| "rewards/margins": 8.409955024719238, | |
| "rewards/rejected": -3.9457967281341553, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 1.7867132867132868e-05, | |
| "logps/chosen": -82.34768676757812, | |
| "logps/rejected": -116.94005584716797, | |
| "loss": 0.0061, | |
| "losses/dpo": 0.008614934980869293, | |
| "losses/sft": 0.49562689661979675, | |
| "losses/total": 0.008614934980869293, | |
| "ref_logps/chosen": -128.71200561523438, | |
| "ref_logps/rejected": -71.86701202392578, | |
| "rewards/accuracies": 0.9994999766349792, | |
| "rewards/chosen": 4.636431694030762, | |
| "rewards/margins": 9.143735885620117, | |
| "rewards/rejected": -4.507305145263672, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.6993006993006995e-05, | |
| "logps/chosen": -85.28910064697266, | |
| "logps/rejected": -123.17980194091797, | |
| "loss": 0.0053, | |
| "losses/dpo": 0.004700234159827232, | |
| "losses/sft": 0.5220319032669067, | |
| "losses/total": 0.004700234159827232, | |
| "ref_logps/chosen": -129.39625549316406, | |
| "ref_logps/rejected": -70.16360473632812, | |
| "rewards/accuracies": 0.9994999766349792, | |
| "rewards/chosen": 4.410714149475098, | |
| "rewards/margins": 9.712334632873535, | |
| "rewards/rejected": -5.301620006561279, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.611888111888112e-05, | |
| "logps/chosen": -84.59432983398438, | |
| "logps/rejected": -131.1455535888672, | |
| "loss": 0.0051, | |
| "losses/dpo": 0.003301014890894294, | |
| "losses/sft": 0.5115602016448975, | |
| "losses/total": 0.003301014890894294, | |
| "ref_logps/chosen": -127.61747741699219, | |
| "ref_logps/rejected": -71.97355651855469, | |
| "rewards/accuracies": 0.9994999766349792, | |
| "rewards/chosen": 4.302317142486572, | |
| "rewards/margins": 10.219517707824707, | |
| "rewards/rejected": -5.917200088500977, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.5244755244755244e-05, | |
| "logps/chosen": -88.2319564819336, | |
| "logps/rejected": -141.83016967773438, | |
| "loss": 0.0028, | |
| "losses/dpo": 0.002293643541634083, | |
| "losses/sft": 0.5383260846138, | |
| "losses/total": 0.002293643541634083, | |
| "ref_logps/chosen": -129.1661376953125, | |
| "ref_logps/rejected": -71.8288803100586, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.093417167663574, | |
| "rewards/margins": 11.093545913696289, | |
| "rewards/rejected": -7.000128746032715, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 1.4370629370629371e-05, | |
| "logps/chosen": -89.62419891357422, | |
| "logps/rejected": -153.9451904296875, | |
| "loss": 0.0033, | |
| "losses/dpo": 0.0030320805963128805, | |
| "losses/sft": 0.532666027545929, | |
| "losses/total": 0.0030320805963128805, | |
| "ref_logps/chosen": -128.41148376464844, | |
| "ref_logps/rejected": -71.97950744628906, | |
| "rewards/accuracies": 0.9989999532699585, | |
| "rewards/chosen": 3.878729820251465, | |
| "rewards/margins": 12.075300216674805, | |
| "rewards/rejected": -8.196569442749023, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 1.3496503496503497e-05, | |
| "logps/chosen": -86.68380737304688, | |
| "logps/rejected": -156.0954132080078, | |
| "loss": 0.0013, | |
| "losses/dpo": 0.0008313562138937414, | |
| "losses/sft": 0.518293559551239, | |
| "losses/total": 0.0008313562138937414, | |
| "ref_logps/chosen": -128.29469299316406, | |
| "ref_logps/rejected": -71.77529907226562, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.161087989807129, | |
| "rewards/margins": 12.593099594116211, | |
| "rewards/rejected": -8.432010650634766, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 1.2622377622377624e-05, | |
| "logps/chosen": -88.83470916748047, | |
| "logps/rejected": -160.05836486816406, | |
| "loss": 0.0015, | |
| "losses/dpo": 0.0008700879407115281, | |
| "losses/sft": 0.5323516726493835, | |
| "losses/total": 0.0008700879407115281, | |
| "ref_logps/chosen": -128.95960998535156, | |
| "ref_logps/rejected": -70.340576171875, | |
| "rewards/accuracies": 0.9994999766349792, | |
| "rewards/chosen": 4.012491703033447, | |
| "rewards/margins": 12.984270095825195, | |
| "rewards/rejected": -8.971778869628906, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 1.1748251748251748e-05, | |
| "logps/chosen": -90.86172485351562, | |
| "logps/rejected": -162.376953125, | |
| "loss": 0.0011, | |
| "losses/dpo": 0.00106943363789469, | |
| "losses/sft": 0.5586134195327759, | |
| "losses/total": 0.00106943363789469, | |
| "ref_logps/chosen": -129.39662170410156, | |
| "ref_logps/rejected": -71.82042694091797, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.853489398956299, | |
| "rewards/margins": 12.909143447875977, | |
| "rewards/rejected": -9.055652618408203, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 1.0874125874125875e-05, | |
| "logps/chosen": -93.8604507446289, | |
| "logps/rejected": -171.50653076171875, | |
| "loss": 0.0013, | |
| "losses/dpo": 0.0009554739226587117, | |
| "losses/sft": 0.5667473077774048, | |
| "losses/total": 0.0009554739226587117, | |
| "ref_logps/chosen": -128.62173461914062, | |
| "ref_logps/rejected": -72.07390594482422, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.476128339767456, | |
| "rewards/margins": 13.419390678405762, | |
| "rewards/rejected": -9.943263053894043, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 1e-05, | |
| "logps/chosen": -92.96914672851562, | |
| "logps/rejected": -173.7914276123047, | |
| "loss": 0.0009, | |
| "losses/dpo": 0.00044292627717368305, | |
| "losses/sft": 0.561470627784729, | |
| "losses/total": 0.00044292627717368305, | |
| "ref_logps/chosen": -128.7430877685547, | |
| "ref_logps/rejected": -72.57361602783203, | |
| "rewards/accuracies": 0.9994999766349792, | |
| "rewards/chosen": 3.577392578125, | |
| "rewards/margins": 13.699174880981445, | |
| "rewards/rejected": -10.121781349182129, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 9.125874125874126e-06, | |
| "logps/chosen": -94.47010040283203, | |
| "logps/rejected": -175.3832550048828, | |
| "loss": 0.0006, | |
| "losses/dpo": 0.00038047495763748884, | |
| "losses/sft": 0.567641019821167, | |
| "losses/total": 0.00038047495763748884, | |
| "ref_logps/chosen": -128.09613037109375, | |
| "ref_logps/rejected": -71.36651611328125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.3626015186309814, | |
| "rewards/margins": 13.764276504516602, | |
| "rewards/rejected": -10.401673316955566, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 8.251748251748254e-06, | |
| "logps/chosen": -96.90827941894531, | |
| "logps/rejected": -179.17970275878906, | |
| "loss": 0.0004, | |
| "losses/dpo": 0.0003745325666386634, | |
| "losses/sft": 0.5794407725334167, | |
| "losses/total": 0.0003745325666386634, | |
| "ref_logps/chosen": -129.79989624023438, | |
| "ref_logps/rejected": -71.4466323852539, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.2891619205474854, | |
| "rewards/margins": 14.062468528747559, | |
| "rewards/rejected": -10.773306846618652, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 7.377622377622379e-06, | |
| "logps/chosen": -96.15299987792969, | |
| "logps/rejected": -181.71612548828125, | |
| "loss": 0.001, | |
| "losses/dpo": 0.0020445636473596096, | |
| "losses/sft": 0.5634098052978516, | |
| "losses/total": 0.0020445636473596096, | |
| "ref_logps/chosen": -130.4124298095703, | |
| "ref_logps/rejected": -71.5147933959961, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.4259424209594727, | |
| "rewards/margins": 14.446078300476074, | |
| "rewards/rejected": -11.020133972167969, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 6.503496503496504e-06, | |
| "logps/chosen": -93.45982360839844, | |
| "logps/rejected": -177.76284790039062, | |
| "loss": 0.0003, | |
| "losses/dpo": 0.0001847467792686075, | |
| "losses/sft": 0.5676508545875549, | |
| "losses/total": 0.0001847467792686075, | |
| "ref_logps/chosen": -128.48521423339844, | |
| "ref_logps/rejected": -71.86593627929688, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.5025393962860107, | |
| "rewards/margins": 14.092233657836914, | |
| "rewards/rejected": -10.589694023132324, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 5.629370629370629e-06, | |
| "logps/chosen": -95.26840209960938, | |
| "logps/rejected": -181.77322387695312, | |
| "loss": 0.0003, | |
| "losses/dpo": 0.00029167634784244, | |
| "losses/sft": 0.5723408460617065, | |
| "losses/total": 0.00029167634784244, | |
| "ref_logps/chosen": -129.8194580078125, | |
| "ref_logps/rejected": -71.22503662109375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.455106496810913, | |
| "rewards/margins": 14.509923934936523, | |
| "rewards/rejected": -11.054819107055664, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 4.755244755244756e-06, | |
| "logps/chosen": -96.92023468017578, | |
| "logps/rejected": -186.9994354248047, | |
| "loss": 0.0002, | |
| "losses/dpo": 0.00031228098669089377, | |
| "losses/sft": 0.5785849690437317, | |
| "losses/total": 0.00031228098669089377, | |
| "ref_logps/chosen": -129.39373779296875, | |
| "ref_logps/rejected": -72.0064468383789, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.2473514080047607, | |
| "rewards/margins": 14.746650695800781, | |
| "rewards/rejected": -11.499299049377441, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 3.881118881118881e-06, | |
| "logps/chosen": -96.56753540039062, | |
| "logps/rejected": -188.7633819580078, | |
| "loss": 0.0003, | |
| "losses/dpo": 0.00019269342010375112, | |
| "losses/sft": 0.5762569904327393, | |
| "losses/total": 0.00019269342010375112, | |
| "ref_logps/chosen": -128.0542449951172, | |
| "ref_logps/rejected": -71.33090209960938, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.148669481277466, | |
| "rewards/margins": 14.891918182373047, | |
| "rewards/rejected": -11.743247985839844, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 3.006993006993007e-06, | |
| "logps/chosen": -97.47007751464844, | |
| "logps/rejected": -189.46685791015625, | |
| "loss": 0.0002, | |
| "losses/dpo": 0.00025137903867289424, | |
| "losses/sft": 0.5845997333526611, | |
| "losses/total": 0.00025137903867289424, | |
| "ref_logps/chosen": -128.27481079101562, | |
| "ref_logps/rejected": -71.0475082397461, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.0804734230041504, | |
| "rewards/margins": 14.922408103942871, | |
| "rewards/rejected": -11.841936111450195, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 2.132867132867133e-06, | |
| "logps/chosen": -97.53392791748047, | |
| "logps/rejected": -190.3184356689453, | |
| "loss": 0.0003, | |
| "losses/dpo": 0.00022768642520532012, | |
| "losses/sft": 0.5818451642990112, | |
| "losses/total": 0.00022768642520532012, | |
| "ref_logps/chosen": -127.79641723632812, | |
| "ref_logps/rejected": -71.30667877197266, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.0262484550476074, | |
| "rewards/margins": 14.927424430847168, | |
| "rewards/rejected": -11.901176452636719, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 1.258741258741259e-06, | |
| "logps/chosen": -98.7781753540039, | |
| "logps/rejected": -194.68101501464844, | |
| "loss": 0.0002, | |
| "losses/dpo": 0.00027412467170506716, | |
| "losses/sft": 0.5883935689926147, | |
| "losses/total": 0.00027412467170506716, | |
| "ref_logps/chosen": -129.11810302734375, | |
| "ref_logps/rejected": -72.69854736328125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.0339913368225098, | |
| "rewards/margins": 15.232237815856934, | |
| "rewards/rejected": -12.198246955871582, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 3.846153846153847e-07, | |
| "logps/chosen": -99.09100341796875, | |
| "logps/rejected": -193.7891387939453, | |
| "loss": 0.0002, | |
| "losses/dpo": 0.0002839878143277019, | |
| "losses/sft": 0.5941969752311707, | |
| "losses/total": 0.0002839878143277019, | |
| "ref_logps/chosen": -129.47601318359375, | |
| "ref_logps/rejected": -71.78772735595703, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.0384998321533203, | |
| "rewards/margins": 15.238642692565918, | |
| "rewards/rejected": -12.200141906738281, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 636, | |
| "total_flos": 0.0, | |
| "train_loss": 0.02455043116600528, | |
| "train_runtime": 18942.3632, | |
| "train_samples_per_second": 2.686, | |
| "train_steps_per_second": 0.034 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 636, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |