{ "best_metric": null, "best_model_checkpoint": null, "epoch": 22.44488977955912, "eval_steps": 100, "global_step": 2800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8, "eval_loss": 1.9484879970550537, "eval_runtime": 80.1777, "eval_samples_per_second": 2.769, "eval_steps_per_second": 0.349, "step": 100 }, { "epoch": 1.6, "eval_loss": 1.9194128513336182, "eval_runtime": 75.485, "eval_samples_per_second": 2.941, "eval_steps_per_second": 0.371, "step": 200 }, { "epoch": 2.4, "eval_loss": 1.9068584442138672, "eval_runtime": 75.624, "eval_samples_per_second": 2.936, "eval_steps_per_second": 0.37, "step": 300 }, { "epoch": 3.21, "eval_loss": 1.8953101634979248, "eval_runtime": 75.4707, "eval_samples_per_second": 2.942, "eval_steps_per_second": 0.371, "step": 400 }, { "epoch": 4.01, "learning_rate": 2.08611481975968e-05, "loss": 1.9094, "step": 500 }, { "epoch": 4.01, "eval_loss": 1.8837881088256836, "eval_runtime": 75.6492, "eval_samples_per_second": 2.935, "eval_steps_per_second": 0.37, "step": 500 }, { "epoch": 4.81, "eval_loss": 1.8737961053848267, "eval_runtime": 75.486, "eval_samples_per_second": 2.941, "eval_steps_per_second": 0.371, "step": 600 }, { "epoch": 5.61, "eval_loss": 1.8641096353530884, "eval_runtime": 75.4927, "eval_samples_per_second": 2.941, "eval_steps_per_second": 0.371, "step": 700 }, { "epoch": 6.41, "eval_loss": 1.8550605773925781, "eval_runtime": 75.4778, "eval_samples_per_second": 2.941, "eval_steps_per_second": 0.371, "step": 800 }, { "epoch": 7.21, "eval_loss": 1.8458194732666016, "eval_runtime": 75.4994, "eval_samples_per_second": 2.94, "eval_steps_per_second": 0.371, "step": 900 }, { "epoch": 8.02, "learning_rate": 1.6688918558077437e-05, "loss": 1.8138, "step": 1000 }, { "epoch": 8.02, "eval_loss": 1.837294340133667, "eval_runtime": 75.3775, "eval_samples_per_second": 2.945, "eval_steps_per_second": 0.371, "step": 1000 }, { "epoch": 8.82, "eval_loss": 1.8297418355941772, "eval_runtime": 75.4862, "eval_samples_per_second": 2.941, "eval_steps_per_second": 0.371, "step": 1100 }, { "epoch": 9.62, "eval_loss": 1.8239439725875854, "eval_runtime": 75.4899, "eval_samples_per_second": 2.941, "eval_steps_per_second": 0.371, "step": 1200 }, { "epoch": 10.42, "eval_loss": 1.8179348707199097, "eval_runtime": 75.4662, "eval_samples_per_second": 2.942, "eval_steps_per_second": 0.371, "step": 1300 }, { "epoch": 11.22, "eval_loss": 1.8130933046340942, "eval_runtime": 75.5064, "eval_samples_per_second": 2.94, "eval_steps_per_second": 0.371, "step": 1400 }, { "epoch": 12.02, "learning_rate": 1.2516688918558078e-05, "loss": 1.7439, "step": 1500 }, { "epoch": 12.02, "eval_loss": 1.8075053691864014, "eval_runtime": 75.671, "eval_samples_per_second": 2.934, "eval_steps_per_second": 0.37, "step": 1500 }, { "epoch": 12.83, "eval_loss": 1.8044650554656982, "eval_runtime": 75.6414, "eval_samples_per_second": 2.935, "eval_steps_per_second": 0.37, "step": 1600 }, { "epoch": 13.63, "eval_loss": 1.8003138303756714, "eval_runtime": 75.6422, "eval_samples_per_second": 2.935, "eval_steps_per_second": 0.37, "step": 1700 }, { "epoch": 14.43, "eval_loss": 1.7982312440872192, "eval_runtime": 75.6665, "eval_samples_per_second": 2.934, "eval_steps_per_second": 0.37, "step": 1800 }, { "epoch": 15.23, "eval_loss": 1.7970277070999146, "eval_runtime": 75.6383, "eval_samples_per_second": 2.935, "eval_steps_per_second": 0.37, "step": 1900 }, { "epoch": 16.03, "learning_rate": 8.344459279038718e-06, "loss": 1.7005, "step": 2000 }, { "epoch": 16.03, "eval_loss": 1.7927231788635254, "eval_runtime": 78.7098, "eval_samples_per_second": 2.82, "eval_steps_per_second": 0.356, "step": 2000 }, { "epoch": 16.83, "eval_loss": 1.790854573249817, "eval_runtime": 77.0191, "eval_samples_per_second": 2.882, "eval_steps_per_second": 0.364, "step": 2100 }, { "epoch": 17.64, "eval_loss": 1.7891895771026611, "eval_runtime": 76.0084, "eval_samples_per_second": 2.921, "eval_steps_per_second": 0.368, "step": 2200 }, { "epoch": 18.44, "eval_loss": 1.7889128923416138, "eval_runtime": 75.6093, "eval_samples_per_second": 2.936, "eval_steps_per_second": 0.37, "step": 2300 }, { "epoch": 19.24, "eval_loss": 1.788590431213379, "eval_runtime": 75.7303, "eval_samples_per_second": 2.931, "eval_steps_per_second": 0.37, "step": 2400 }, { "epoch": 20.04, "learning_rate": 4.172229639519359e-06, "loss": 1.6705, "step": 2500 }, { "epoch": 20.04, "eval_loss": 1.7878992557525635, "eval_runtime": 75.5956, "eval_samples_per_second": 2.937, "eval_steps_per_second": 0.37, "step": 2500 }, { "epoch": 20.84, "eval_loss": 1.7869019508361816, "eval_runtime": 75.569, "eval_samples_per_second": 2.938, "eval_steps_per_second": 0.371, "step": 2600 }, { "epoch": 21.64, "eval_loss": 1.7858072519302368, "eval_runtime": 75.7601, "eval_samples_per_second": 2.93, "eval_steps_per_second": 0.37, "step": 2700 }, { "epoch": 22.44, "eval_loss": 1.7860134840011597, "eval_runtime": 75.7457, "eval_samples_per_second": 2.931, "eval_steps_per_second": 0.37, "step": 2800 } ], "logging_steps": 500, "max_steps": 3000, "num_train_epochs": 25, "save_steps": 100, "total_flos": 3.543009935795159e+18, "trial_name": null, "trial_params": null }