| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9937888198757764, |
| "eval_steps": 20, |
| "global_step": 100, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.09937888198757763, |
| "grad_norm": 23.625, |
| "learning_rate": 4.967291771834727e-06, |
| "logits/chosen": -1.450693130493164, |
| "logits/rejected": -1.4506696462631226, |
| "logps/chosen": -300.4499206542969, |
| "logps/rejected": -581.7461547851562, |
| "loss": 0.5136, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.27882489562034607, |
| "rewards/margins": 1.4301296472549438, |
| "rewards/rejected": -1.7089545726776123, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.19875776397515527, |
| "grad_norm": 31.625, |
| "learning_rate": 4.710738726825059e-06, |
| "logits/chosen": -1.3673866987228394, |
| "logits/rejected": -1.3606162071228027, |
| "logps/chosen": -336.33209228515625, |
| "logps/rejected": -559.6798706054688, |
| "loss": 0.247, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -1.2435611486434937, |
| "rewards/margins": 3.834404468536377, |
| "rewards/rejected": -5.07796573638916, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.19875776397515527, |
| "eval_logits/chosen": -1.3263726234436035, |
| "eval_logits/rejected": -1.311964988708496, |
| "eval_logps/chosen": -339.557861328125, |
| "eval_logps/rejected": -699.66259765625, |
| "eval_loss": 0.1816757321357727, |
| "eval_rewards/accuracies": 0.9800000190734863, |
| "eval_rewards/chosen": -1.7551236152648926, |
| "eval_rewards/margins": 7.074146270751953, |
| "eval_rewards/rejected": -8.829269409179688, |
| "eval_runtime": 16.2646, |
| "eval_samples_per_second": 3.074, |
| "eval_steps_per_second": 3.074, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.2981366459627329, |
| "grad_norm": 48.5, |
| "learning_rate": 4.224309443195261e-06, |
| "logits/chosen": -1.2823044061660767, |
| "logits/rejected": -1.2803713083267212, |
| "logps/chosen": -333.9573669433594, |
| "logps/rejected": -675.1748046875, |
| "loss": 0.19, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -2.4483742713928223, |
| "rewards/margins": 7.259348392486572, |
| "rewards/rejected": -9.707722663879395, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.39751552795031053, |
| "grad_norm": 21.625, |
| "learning_rate": 3.5585836356437266e-06, |
| "logits/chosen": -1.260244369506836, |
| "logits/rejected": -1.258673071861267, |
| "logps/chosen": -440.227783203125, |
| "logps/rejected": -945.3692626953125, |
| "loss": 0.0878, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -4.9525017738342285, |
| "rewards/margins": 12.335118293762207, |
| "rewards/rejected": -17.28761863708496, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.39751552795031053, |
| "eval_logits/chosen": -1.2100083827972412, |
| "eval_logits/rejected": -1.2017772197723389, |
| "eval_logps/chosen": -406.1869201660156, |
| "eval_logps/rejected": -921.7108764648438, |
| "eval_loss": 0.0693456158041954, |
| "eval_rewards/accuracies": 0.9800000190734863, |
| "eval_rewards/chosen": -5.086578845977783, |
| "eval_rewards/margins": 14.845105171203613, |
| "eval_rewards/rejected": -19.931682586669922, |
| "eval_runtime": 16.3825, |
| "eval_samples_per_second": 3.052, |
| "eval_steps_per_second": 3.052, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.4968944099378882, |
| "grad_norm": 14.375, |
| "learning_rate": 2.7827845654882112e-06, |
| "logits/chosen": -1.1697198152542114, |
| "logits/rejected": -1.174869418144226, |
| "logps/chosen": -406.47735595703125, |
| "logps/rejected": -870.3468627929688, |
| "loss": 0.0755, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -6.050213813781738, |
| "rewards/margins": 12.730817794799805, |
| "rewards/rejected": -18.78103256225586, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.5962732919254659, |
| "grad_norm": 8.6875, |
| "learning_rate": 1.9775810924218126e-06, |
| "logits/chosen": -1.1027421951293945, |
| "logits/rejected": -1.1014692783355713, |
| "logps/chosen": -458.50201416015625, |
| "logps/rejected": -897.8450927734375, |
| "loss": 0.0552, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -7.981372833251953, |
| "rewards/margins": 14.360372543334961, |
| "rewards/rejected": -22.341745376586914, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.5962732919254659, |
| "eval_logits/chosen": -1.0882357358932495, |
| "eval_logits/rejected": -1.088262677192688, |
| "eval_logps/chosen": -476.73382568359375, |
| "eval_logps/rejected": -1135.177734375, |
| "eval_loss": 0.037782467901706696, |
| "eval_rewards/accuracies": 0.9800000190734863, |
| "eval_rewards/chosen": -8.613921165466309, |
| "eval_rewards/margins": 21.991100311279297, |
| "eval_rewards/rejected": -30.605024337768555, |
| "eval_runtime": 16.2315, |
| "eval_samples_per_second": 3.08, |
| "eval_steps_per_second": 3.08, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.6956521739130435, |
| "grad_norm": 8.1875, |
| "learning_rate": 1.2266995941780934e-06, |
| "logits/chosen": -1.077918291091919, |
| "logits/rejected": -1.073142170906067, |
| "logps/chosen": -450.880859375, |
| "logps/rejected": -798.1456298828125, |
| "loss": 0.0748, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -8.840319633483887, |
| "rewards/margins": 12.28600025177002, |
| "rewards/rejected": -21.12632179260254, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.7950310559006211, |
| "grad_norm": 13.3125, |
| "learning_rate": 6.082179604557617e-07, |
| "logits/chosen": -1.0586068630218506, |
| "logits/rejected": -1.0654761791229248, |
| "logps/chosen": -583.5306396484375, |
| "logps/rejected": -1094.6214599609375, |
| "loss": 0.0361, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -11.453761100769043, |
| "rewards/margins": 17.418289184570312, |
| "rewards/rejected": -28.87204933166504, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.7950310559006211, |
| "eval_logits/chosen": -1.056160569190979, |
| "eval_logits/rejected": -1.0596200227737427, |
| "eval_logps/chosen": -494.7749938964844, |
| "eval_logps/rejected": -1187.513427734375, |
| "eval_loss": 0.03448532894253731, |
| "eval_rewards/accuracies": 0.9800000190734863, |
| "eval_rewards/chosen": -9.515981674194336, |
| "eval_rewards/margins": 23.70583724975586, |
| "eval_rewards/rejected": -33.22181701660156, |
| "eval_runtime": 16.2498, |
| "eval_samples_per_second": 3.077, |
| "eval_steps_per_second": 3.077, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.8944099378881988, |
| "grad_norm": 10.8125, |
| "learning_rate": 1.864469258932397e-07, |
| "logits/chosen": -1.0640597343444824, |
| "logits/rejected": -1.0643057823181152, |
| "logps/chosen": -545.856689453125, |
| "logps/rejected": -1039.5836181640625, |
| "loss": 0.0496, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -10.582590103149414, |
| "rewards/margins": 17.025867462158203, |
| "rewards/rejected": -27.608455657958984, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.9937888198757764, |
| "grad_norm": 16.75, |
| "learning_rate": 5.242934405720879e-09, |
| "logits/chosen": -1.025246500968933, |
| "logits/rejected": -1.0300629138946533, |
| "logps/chosen": -522.3057861328125, |
| "logps/rejected": -1139.15283203125, |
| "loss": 0.0586, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -10.60677433013916, |
| "rewards/margins": 20.42071533203125, |
| "rewards/rejected": -31.027490615844727, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.9937888198757764, |
| "eval_logits/chosen": -1.0552905797958374, |
| "eval_logits/rejected": -1.0563225746154785, |
| "eval_logps/chosen": -495.5924072265625, |
| "eval_logps/rejected": -1189.4423828125, |
| "eval_loss": 0.0349690206348896, |
| "eval_rewards/accuracies": 0.9800000190734863, |
| "eval_rewards/chosen": -9.556852340698242, |
| "eval_rewards/margins": 23.761411666870117, |
| "eval_rewards/rejected": -33.318260192871094, |
| "eval_runtime": 16.2372, |
| "eval_samples_per_second": 3.079, |
| "eval_steps_per_second": 3.079, |
| "step": 100 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 101, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|