{ "best_global_step": 1100, "best_metric": 0.07910037785768509, "best_model_checkpoint": "OpenAHA-Calvin-2p-3b/checkpoint-1100", "epoch": 2.9121644017235666, "eval_steps": 100, "global_step": 1100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2651640702684786, "eval_loss": 0.1917700171470642, "eval_runtime": 35.1714, "eval_samples_per_second": 5.772, "eval_steps_per_second": 0.739, "step": 100 }, { "epoch": 0.5303281405369572, "eval_loss": 0.13455702364444733, "eval_runtime": 33.0746, "eval_samples_per_second": 6.138, "eval_steps_per_second": 0.786, "step": 200 }, { "epoch": 0.7954922108054359, "eval_loss": 0.1287507265806198, "eval_runtime": 36.6447, "eval_samples_per_second": 5.54, "eval_steps_per_second": 0.71, "step": 300 }, { "epoch": 1.0583360954590653, "eval_loss": 0.10800737142562866, "eval_runtime": 33.0659, "eval_samples_per_second": 6.139, "eval_steps_per_second": 0.786, "step": 400 }, { "epoch": 1.3235001657275438, "grad_norm": 2.1960959434509277, "learning_rate": 1.2450980392156864e-05, "loss": 0.4408, "step": 500 }, { "epoch": 1.3235001657275438, "eval_loss": 0.10127653181552887, "eval_runtime": 32.9484, "eval_samples_per_second": 6.161, "eval_steps_per_second": 0.789, "step": 500 }, { "epoch": 1.5886642359960226, "eval_loss": 0.10220777243375778, "eval_runtime": 33.1253, "eval_samples_per_second": 6.128, "eval_steps_per_second": 0.785, "step": 600 }, { "epoch": 1.8538283062645011, "eval_loss": 0.09732703119516373, "eval_runtime": 32.9224, "eval_samples_per_second": 6.166, "eval_steps_per_second": 0.79, "step": 700 }, { "epoch": 2.1166721909181305, "eval_loss": 0.08920517563819885, "eval_runtime": 35.8547, "eval_samples_per_second": 5.662, "eval_steps_per_second": 0.725, "step": 800 }, { "epoch": 2.381836261186609, "eval_loss": 0.0842105895280838, "eval_runtime": 33.201, "eval_samples_per_second": 6.114, "eval_steps_per_second": 0.783, "step": 900 }, { "epoch": 2.6470003314550876, "grad_norm": 1.400881290435791, "learning_rate": 2.647058823529412e-06, "loss": 0.0631, "step": 1000 }, { "epoch": 2.6470003314550876, "eval_loss": 0.08373343199491501, "eval_runtime": 33.2639, "eval_samples_per_second": 6.103, "eval_steps_per_second": 0.782, "step": 1000 }, { "epoch": 2.9121644017235666, "eval_loss": 0.07910037785768509, "eval_runtime": 33.3862, "eval_samples_per_second": 6.08, "eval_steps_per_second": 0.779, "step": 1100 } ], "logging_steps": 500, "max_steps": 1134, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.0716799705164969e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }