| { | |
| "best_global_step": 1053, | |
| "best_metric": 0.5389207005500793, | |
| "best_model_checkpoint": "./mcqa_model_test_with_pref/checkpoint-1053", | |
| "epoch": 0.9995253915519696, | |
| "eval_steps": 500, | |
| "global_step": 1053, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04746084480303749, | |
| "grad_norm": 49.358665466308594, | |
| "learning_rate": 4.150943396226415e-07, | |
| "loss": 0.7352, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.09492168960607499, | |
| "grad_norm": 74.5323486328125, | |
| "learning_rate": 8.867924528301887e-07, | |
| "loss": 0.6636, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.14238253440911247, | |
| "grad_norm": 61.94378662109375, | |
| "learning_rate": 9.598732840549102e-07, | |
| "loss": 0.6403, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.18984337921214997, | |
| "grad_norm": 58.84312057495117, | |
| "learning_rate": 9.070749736008447e-07, | |
| "loss": 0.5677, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.23730422401518747, | |
| "grad_norm": 80.92351531982422, | |
| "learning_rate": 8.542766631467792e-07, | |
| "loss": 0.6037, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.28476506881822494, | |
| "grad_norm": 73.99102020263672, | |
| "learning_rate": 8.014783526927138e-07, | |
| "loss": 0.566, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.33222591362126247, | |
| "grad_norm": 64.94400787353516, | |
| "learning_rate": 7.486800422386483e-07, | |
| "loss": 0.5833, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.37968675842429994, | |
| "grad_norm": 71.62226867675781, | |
| "learning_rate": 6.958817317845829e-07, | |
| "loss": 0.6499, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.42714760322733747, | |
| "grad_norm": 71.14134216308594, | |
| "learning_rate": 6.430834213305174e-07, | |
| "loss": 0.5799, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.47460844803037494, | |
| "grad_norm": 44.287513732910156, | |
| "learning_rate": 5.90285110876452e-07, | |
| "loss": 0.5158, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5220692928334124, | |
| "grad_norm": 62.8205451965332, | |
| "learning_rate": 5.374868004223864e-07, | |
| "loss": 0.5618, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.5695301376364499, | |
| "grad_norm": 77.03436279296875, | |
| "learning_rate": 4.84688489968321e-07, | |
| "loss": 0.5534, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.6169909824394875, | |
| "grad_norm": 70.07403564453125, | |
| "learning_rate": 4.3189017951425557e-07, | |
| "loss": 0.6259, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.6644518272425249, | |
| "grad_norm": 40.0705680847168, | |
| "learning_rate": 3.790918690601901e-07, | |
| "loss": 0.6029, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.7119126720455624, | |
| "grad_norm": 61.24772262573242, | |
| "learning_rate": 3.262935586061246e-07, | |
| "loss": 0.5652, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.7593735168485999, | |
| "grad_norm": 64.95348358154297, | |
| "learning_rate": 2.734952481520591e-07, | |
| "loss": 0.5716, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.8068343616516374, | |
| "grad_norm": 66.72836303710938, | |
| "learning_rate": 2.2069693769799365e-07, | |
| "loss": 0.582, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.8542952064546749, | |
| "grad_norm": 45.734771728515625, | |
| "learning_rate": 1.678986272439282e-07, | |
| "loss": 0.5445, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.9017560512577124, | |
| "grad_norm": 77.45156860351562, | |
| "learning_rate": 1.1510031678986272e-07, | |
| "loss": 0.5145, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.9492168960607499, | |
| "grad_norm": 63.75503158569336, | |
| "learning_rate": 6.230200633579724e-08, | |
| "loss": 0.5895, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.9966777408637874, | |
| "grad_norm": 59.99051284790039, | |
| "learning_rate": 9.503695881731783e-09, | |
| "loss": 0.5374, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.9995253915519696, | |
| "eval_loss": 0.5389207005500793, | |
| "eval_runtime": 70.1537, | |
| "eval_samples_per_second": 45.03, | |
| "eval_steps_per_second": 5.63, | |
| "step": 1053 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 1053, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8014675354583040.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |