| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 30.0, |
| "global_step": 6480, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "eval_loss": 5.858560562133789, |
| "eval_runtime": 5.0347, |
| "eval_samples_per_second": 2349.702, |
| "eval_steps_per_second": 2.383, |
| "step": 216 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 5.509454727172852, |
| "eval_runtime": 4.6188, |
| "eval_samples_per_second": 2561.29, |
| "eval_steps_per_second": 2.598, |
| "step": 432 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 0.0001, |
| "loss": 6.688, |
| "step": 500 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 5.397603511810303, |
| "eval_runtime": 4.6394, |
| "eval_samples_per_second": 2549.872, |
| "eval_steps_per_second": 2.587, |
| "step": 648 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_loss": 5.356218338012695, |
| "eval_runtime": 4.6644, |
| "eval_samples_per_second": 2536.231, |
| "eval_steps_per_second": 2.573, |
| "step": 864 |
| }, |
| { |
| "epoch": 4.63, |
| "learning_rate": 0.0002, |
| "loss": 5.3629, |
| "step": 1000 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_loss": 5.291167736053467, |
| "eval_runtime": 4.6079, |
| "eval_samples_per_second": 2567.32, |
| "eval_steps_per_second": 2.604, |
| "step": 1080 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_loss": 5.238525867462158, |
| "eval_runtime": 4.628, |
| "eval_samples_per_second": 2556.182, |
| "eval_steps_per_second": 2.593, |
| "step": 1296 |
| }, |
| { |
| "epoch": 6.94, |
| "learning_rate": 0.00018175182481751826, |
| "loss": 5.22, |
| "step": 1500 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_loss": 5.195512771606445, |
| "eval_runtime": 4.6676, |
| "eval_samples_per_second": 2534.484, |
| "eval_steps_per_second": 2.571, |
| "step": 1512 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_loss": 5.178501605987549, |
| "eval_runtime": 4.6613, |
| "eval_samples_per_second": 2537.906, |
| "eval_steps_per_second": 2.574, |
| "step": 1728 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_loss": 5.132693767547607, |
| "eval_runtime": 4.6567, |
| "eval_samples_per_second": 2540.411, |
| "eval_steps_per_second": 2.577, |
| "step": 1944 |
| }, |
| { |
| "epoch": 9.26, |
| "learning_rate": 0.0001635036496350365, |
| "loss": 5.1248, |
| "step": 2000 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_loss": 5.1242523193359375, |
| "eval_runtime": 4.6307, |
| "eval_samples_per_second": 2554.696, |
| "eval_steps_per_second": 2.591, |
| "step": 2160 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_loss": 5.0888824462890625, |
| "eval_runtime": 4.6284, |
| "eval_samples_per_second": 2555.934, |
| "eval_steps_per_second": 2.593, |
| "step": 2376 |
| }, |
| { |
| "epoch": 11.57, |
| "learning_rate": 0.00014525547445255475, |
| "loss": 5.0591, |
| "step": 2500 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_loss": 5.073211669921875, |
| "eval_runtime": 4.7033, |
| "eval_samples_per_second": 2515.279, |
| "eval_steps_per_second": 2.551, |
| "step": 2592 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_loss": 5.041653633117676, |
| "eval_runtime": 4.6143, |
| "eval_samples_per_second": 2563.795, |
| "eval_steps_per_second": 2.601, |
| "step": 2808 |
| }, |
| { |
| "epoch": 13.89, |
| "learning_rate": 0.000127007299270073, |
| "loss": 5.0094, |
| "step": 3000 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_loss": 5.038762092590332, |
| "eval_runtime": 4.6173, |
| "eval_samples_per_second": 2562.095, |
| "eval_steps_per_second": 2.599, |
| "step": 3024 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_loss": 4.9298810958862305, |
| "eval_runtime": 4.5819, |
| "eval_samples_per_second": 2581.88, |
| "eval_steps_per_second": 2.619, |
| "step": 3240 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_loss": 4.299057960510254, |
| "eval_runtime": 4.6127, |
| "eval_samples_per_second": 2564.64, |
| "eval_steps_per_second": 2.601, |
| "step": 3456 |
| }, |
| { |
| "epoch": 16.2, |
| "learning_rate": 0.00010875912408759123, |
| "loss": 4.7527, |
| "step": 3500 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_loss": 3.654055118560791, |
| "eval_runtime": 4.5888, |
| "eval_samples_per_second": 2578.023, |
| "eval_steps_per_second": 2.615, |
| "step": 3672 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_loss": 2.7825753688812256, |
| "eval_runtime": 4.6393, |
| "eval_samples_per_second": 2549.967, |
| "eval_steps_per_second": 2.587, |
| "step": 3888 |
| }, |
| { |
| "epoch": 18.52, |
| "learning_rate": 9.051094890510949e-05, |
| "loss": 3.4431, |
| "step": 4000 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_loss": 2.2795569896698, |
| "eval_runtime": 4.647, |
| "eval_samples_per_second": 2545.709, |
| "eval_steps_per_second": 2.582, |
| "step": 4104 |
| }, |
| { |
| "epoch": 20.0, |
| "eval_loss": 2.021310806274414, |
| "eval_runtime": 4.6303, |
| "eval_samples_per_second": 2554.922, |
| "eval_steps_per_second": 2.592, |
| "step": 4320 |
| }, |
| { |
| "epoch": 20.83, |
| "learning_rate": 7.226277372262774e-05, |
| "loss": 2.2803, |
| "step": 4500 |
| }, |
| { |
| "epoch": 21.0, |
| "eval_loss": 1.8808549642562866, |
| "eval_runtime": 4.6167, |
| "eval_samples_per_second": 2562.421, |
| "eval_steps_per_second": 2.599, |
| "step": 4536 |
| }, |
| { |
| "epoch": 22.0, |
| "eval_loss": 1.7615374326705933, |
| "eval_runtime": 4.6259, |
| "eval_samples_per_second": 2557.316, |
| "eval_steps_per_second": 2.594, |
| "step": 4752 |
| }, |
| { |
| "epoch": 23.0, |
| "eval_loss": 1.6925297975540161, |
| "eval_runtime": 4.6567, |
| "eval_samples_per_second": 2540.444, |
| "eval_steps_per_second": 2.577, |
| "step": 4968 |
| }, |
| { |
| "epoch": 23.15, |
| "learning_rate": 5.401459854014599e-05, |
| "loss": 1.8601, |
| "step": 5000 |
| }, |
| { |
| "epoch": 24.0, |
| "eval_loss": 1.6204941272735596, |
| "eval_runtime": 4.7455, |
| "eval_samples_per_second": 2492.914, |
| "eval_steps_per_second": 2.529, |
| "step": 5184 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_loss": 1.5750768184661865, |
| "eval_runtime": 4.6733, |
| "eval_samples_per_second": 2531.399, |
| "eval_steps_per_second": 2.568, |
| "step": 5400 |
| }, |
| { |
| "epoch": 25.46, |
| "learning_rate": 3.5766423357664236e-05, |
| "loss": 1.6697, |
| "step": 5500 |
| }, |
| { |
| "epoch": 26.0, |
| "eval_loss": 1.5390561819076538, |
| "eval_runtime": 4.6271, |
| "eval_samples_per_second": 2556.66, |
| "eval_steps_per_second": 2.593, |
| "step": 5616 |
| }, |
| { |
| "epoch": 27.0, |
| "eval_loss": 1.520015835762024, |
| "eval_runtime": 4.6894, |
| "eval_samples_per_second": 2522.687, |
| "eval_steps_per_second": 2.559, |
| "step": 5832 |
| }, |
| { |
| "epoch": 27.78, |
| "learning_rate": 1.7518248175182482e-05, |
| "loss": 1.5655, |
| "step": 6000 |
| }, |
| { |
| "epoch": 28.0, |
| "eval_loss": 1.4865714311599731, |
| "eval_runtime": 4.6379, |
| "eval_samples_per_second": 2550.726, |
| "eval_steps_per_second": 2.587, |
| "step": 6048 |
| }, |
| { |
| "epoch": 29.0, |
| "eval_loss": 1.4655797481536865, |
| "eval_runtime": 4.6124, |
| "eval_samples_per_second": 2564.811, |
| "eval_steps_per_second": 2.602, |
| "step": 6264 |
| }, |
| { |
| "epoch": 30.0, |
| "eval_loss": 1.4627275466918945, |
| "eval_runtime": 4.9986, |
| "eval_samples_per_second": 2366.681, |
| "eval_steps_per_second": 2.401, |
| "step": 6480 |
| } |
| ], |
| "max_steps": 6480, |
| "num_train_epochs": 30, |
| "total_flos": 5.459072646905856e+16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|