| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9984, |
| "eval_steps": 500, |
| "global_step": 156, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.064, |
| "grad_norm": 51.604713439941406, |
| "learning_rate": 6.249999999999999e-07, |
| "logits/chosen": -1.7511365413665771, |
| "logits/rejected": -0.9332168102264404, |
| "logps/chosen": -326.5755615234375, |
| "logps/rejected": -795.309326171875, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.49687498807907104, |
| "rewards/chosen": 0.008899950422346592, |
| "rewards/margins": 0.04068336635828018, |
| "rewards/rejected": -0.031783413141965866, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 11.927907943725586, |
| "learning_rate": 9.979871469976195e-07, |
| "logits/chosen": -1.7664214372634888, |
| "logits/rejected": -0.9802268743515015, |
| "logps/chosen": -345.8614501953125, |
| "logps/rejected": -836.3780517578125, |
| "loss": 0.3867, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": 0.04140026494860649, |
| "rewards/margins": 1.189576506614685, |
| "rewards/rejected": -1.1481761932373047, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 0.19122953712940216, |
| "learning_rate": 9.755282581475767e-07, |
| "logits/chosen": -1.9365524053573608, |
| "logits/rejected": -1.328689694404602, |
| "logps/chosen": -346.8349914550781, |
| "logps/rejected": -943.77587890625, |
| "loss": 0.1303, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -3.2460498809814453, |
| "rewards/margins": 11.235220909118652, |
| "rewards/rejected": -14.481270790100098, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 0.22885994613170624, |
| "learning_rate": 9.29224396800933e-07, |
| "logits/chosen": -2.040452241897583, |
| "logits/rejected": -1.6386874914169312, |
| "logps/chosen": -419.212646484375, |
| "logps/rejected": -1117.9078369140625, |
| "loss": 0.0405, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -10.218693733215332, |
| "rewards/margins": 25.372825622558594, |
| "rewards/rejected": -35.591522216796875, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.15373322367668152, |
| "learning_rate": 8.613974319136957e-07, |
| "logits/chosen": -1.946674108505249, |
| "logits/rejected": -1.572014570236206, |
| "logps/chosen": -434.0313720703125, |
| "logps/rejected": -1180.6865234375, |
| "loss": 0.0349, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -9.76918888092041, |
| "rewards/margins": 27.693220138549805, |
| "rewards/rejected": -37.46240997314453, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 0.13283002376556396, |
| "learning_rate": 7.754484907260512e-07, |
| "logits/chosen": -1.8221015930175781, |
| "logits/rejected": -1.4241716861724854, |
| "logps/chosen": -397.850341796875, |
| "logps/rejected": -1054.3414306640625, |
| "loss": 0.032, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -6.713717937469482, |
| "rewards/margins": 22.15141487121582, |
| "rewards/rejected": -28.865131378173828, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 0.12337207049131393, |
| "learning_rate": 6.756874120406714e-07, |
| "logits/chosen": -1.7552038431167603, |
| "logits/rejected": -1.296088457107544, |
| "logps/chosen": -325.4990539550781, |
| "logps/rejected": -998.6246337890625, |
| "loss": 0.0336, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -3.70082950592041, |
| "rewards/margins": 19.856613159179688, |
| "rewards/rejected": -23.557445526123047, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 0.0956556648015976, |
| "learning_rate": 5.671166329088277e-07, |
| "logits/chosen": -1.6788297891616821, |
| "logits/rejected": -1.2116974592208862, |
| "logps/chosen": -347.7698974609375, |
| "logps/rejected": -1006.9019775390625, |
| "loss": 0.0304, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": -3.5100672245025635, |
| "rewards/margins": 18.646265029907227, |
| "rewards/rejected": -22.15633201599121, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 0.09139358252286911, |
| "learning_rate": 4.5518034554828327e-07, |
| "logits/chosen": -1.6748931407928467, |
| "logits/rejected": -1.1833109855651855, |
| "logps/chosen": -355.009033203125, |
| "logps/rejected": -1040.528076171875, |
| "loss": 0.036, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -3.3938891887664795, |
| "rewards/margins": 18.697372436523438, |
| "rewards/rejected": -22.09126091003418, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 0.10561755299568176, |
| "learning_rate": 3.454915028125263e-07, |
| "logits/chosen": -1.6425403356552124, |
| "logits/rejected": -1.1396013498306274, |
| "logps/chosen": -336.59295654296875, |
| "logps/rejected": -1013.5849609375, |
| "loss": 0.026, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -3.053579092025757, |
| "rewards/margins": 18.989028930664062, |
| "rewards/rejected": -22.0426082611084, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 0.44353532791137695, |
| "learning_rate": 2.4355036129704696e-07, |
| "logits/chosen": -1.6363352537155151, |
| "logits/rejected": -1.1411080360412598, |
| "logps/chosen": -348.1735534667969, |
| "logps/rejected": -1026.014404296875, |
| "loss": 0.0354, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -3.1121909618377686, |
| "rewards/margins": 18.843551635742188, |
| "rewards/rejected": -21.95574378967285, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 0.30070415139198303, |
| "learning_rate": 1.5446867550656767e-07, |
| "logits/chosen": -1.5898562669754028, |
| "logits/rejected": -1.1400786638259888, |
| "logps/chosen": -379.26361083984375, |
| "logps/rejected": -996.3677978515625, |
| "loss": 0.0419, |
| "rewards/accuracies": 0.984375, |
| "rewards/chosen": -3.5438451766967773, |
| "rewards/margins": 17.603504180908203, |
| "rewards/rejected": -21.147350311279297, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 0.4903264343738556, |
| "learning_rate": 8.271337313934867e-08, |
| "logits/chosen": -1.6253430843353271, |
| "logits/rejected": -1.1439229249954224, |
| "logps/chosen": -365.46771240234375, |
| "logps/rejected": -1015.4036254882812, |
| "loss": 0.031, |
| "rewards/accuracies": 0.9906249642372131, |
| "rewards/chosen": -3.238945245742798, |
| "rewards/margins": 18.085920333862305, |
| "rewards/rejected": -21.324867248535156, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 0.07947251945734024, |
| "learning_rate": 3.188256468013139e-08, |
| "logits/chosen": -1.650820255279541, |
| "logits/rejected": -1.1272088289260864, |
| "logps/chosen": -320.8426208496094, |
| "logps/rejected": -1027.0377197265625, |
| "loss": 0.0271, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": -2.4786899089813232, |
| "rewards/margins": 19.36054801940918, |
| "rewards/rejected": -21.839237213134766, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 0.07403174042701721, |
| "learning_rate": 4.5251191160326495e-09, |
| "logits/chosen": -1.6176533699035645, |
| "logits/rejected": -1.134464979171753, |
| "logps/chosen": -357.2913513183594, |
| "logps/rejected": -1029.54443359375, |
| "loss": 0.0324, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -3.00289249420166, |
| "rewards/margins": 18.549577713012695, |
| "rewards/rejected": -21.552471160888672, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.9984, |
| "step": 156, |
| "total_flos": 1.1530176818095063e+18, |
| "train_loss": 0.1052116885399207, |
| "train_runtime": 6627.1574, |
| "train_samples_per_second": 0.754, |
| "train_steps_per_second": 0.024 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 156, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.1530176818095063e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|