{ "best_metric": null, "best_model_checkpoint": null, "epoch": 16.0, "global_step": 16800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 5e-05, "loss": 1.7753, "step": 128 }, { "epoch": 0.24, "learning_rate": 5e-05, "loss": 1.4026, "step": 256 }, { "epoch": 0.37, "learning_rate": 5e-05, "loss": 1.3331, "step": 384 }, { "epoch": 0.49, "learning_rate": 5e-05, "loss": 1.3015, "step": 512 }, { "epoch": 0.61, "learning_rate": 5e-05, "loss": 1.2594, "step": 640 }, { "epoch": 0.73, "learning_rate": 5e-05, "loss": 1.2452, "step": 768 }, { "epoch": 0.85, "learning_rate": 5e-05, "loss": 1.2219, "step": 896 }, { "epoch": 0.98, "learning_rate": 5e-05, "loss": 1.2057, "step": 1024 }, { "epoch": 1.1, "learning_rate": 5e-05, "loss": 1.086, "step": 1152 }, { "epoch": 1.22, "learning_rate": 5e-05, "loss": 1.046, "step": 1280 }, { "epoch": 1.34, "learning_rate": 5e-05, "loss": 1.0642, "step": 1408 }, { "epoch": 1.46, "learning_rate": 5e-05, "loss": 1.0602, "step": 1536 }, { "epoch": 1.58, "learning_rate": 5e-05, "loss": 1.0552, "step": 1664 }, { "epoch": 1.71, "learning_rate": 5e-05, "loss": 1.0565, "step": 1792 }, { "epoch": 1.83, "learning_rate": 5e-05, "loss": 1.0602, "step": 1920 }, { "epoch": 1.95, "learning_rate": 5e-05, "loss": 1.0534, "step": 2048 }, { "epoch": 2.07, "learning_rate": 5e-05, "loss": 0.9373, "step": 2176 }, { "epoch": 2.19, "learning_rate": 5e-05, "loss": 0.8638, "step": 2304 }, { "epoch": 2.32, "learning_rate": 5e-05, "loss": 0.8759, "step": 2432 }, { "epoch": 2.44, "learning_rate": 5e-05, "loss": 0.8775, "step": 2560 }, { "epoch": 2.56, "learning_rate": 5e-05, "loss": 0.8906, "step": 2688 }, { "epoch": 2.68, "learning_rate": 5e-05, "loss": 0.8947, "step": 2816 }, { "epoch": 2.8, "learning_rate": 5e-05, "loss": 0.8846, "step": 2944 }, { "epoch": 2.93, "learning_rate": 5e-05, "loss": 0.8891, "step": 3072 }, { "epoch": 3.05, "learning_rate": 5e-05, "loss": 0.8039, "step": 3200 }, { "epoch": 3.17, "learning_rate": 5e-05, "loss": 0.6736, "step": 3328 }, { "epoch": 3.29, "learning_rate": 5e-05, "loss": 0.6814, "step": 3456 }, { "epoch": 3.41, "learning_rate": 5e-05, "loss": 0.7016, "step": 3584 }, { "epoch": 3.54, "learning_rate": 5e-05, "loss": 0.7039, "step": 3712 }, { "epoch": 3.66, "learning_rate": 5e-05, "loss": 0.708, "step": 3840 }, { "epoch": 3.78, "learning_rate": 5e-05, "loss": 0.7175, "step": 3968 }, { "epoch": 3.9, "learning_rate": 5e-05, "loss": 0.7214, "step": 4096 }, { "epoch": 4.02, "learning_rate": 5e-05, "loss": 0.6764, "step": 4224 }, { "epoch": 4.14, "learning_rate": 5e-05, "loss": 0.4891, "step": 4352 }, { "epoch": 4.27, "learning_rate": 5e-05, "loss": 0.5006, "step": 4480 }, { "epoch": 4.39, "learning_rate": 5e-05, "loss": 0.5129, "step": 4608 }, { "epoch": 4.51, "learning_rate": 5e-05, "loss": 0.5221, "step": 4736 }, { "epoch": 4.63, "learning_rate": 5e-05, "loss": 0.5278, "step": 4864 }, { "epoch": 4.75, "learning_rate": 5e-05, "loss": 0.5413, "step": 4992 }, { "epoch": 4.88, "learning_rate": 5e-05, "loss": 0.5393, "step": 5120 }, { "epoch": 5.0, "learning_rate": 5e-05, "loss": 0.5398, "step": 5248 }, { "epoch": 5.12, "learning_rate": 5e-05, "loss": 0.3369, "step": 5376 }, { "epoch": 5.24, "learning_rate": 5e-05, "loss": 0.3417, "step": 5504 }, { "epoch": 5.36, "learning_rate": 5e-05, "loss": 0.3502, "step": 5632 }, { "epoch": 5.49, "learning_rate": 5e-05, "loss": 0.3593, "step": 5760 }, { "epoch": 5.61, "learning_rate": 5e-05, "loss": 0.3695, "step": 5888 }, { "epoch": 5.73, "learning_rate": 5e-05, "loss": 0.3764, "step": 6016 }, { "epoch": 5.85, "learning_rate": 5e-05, "loss": 0.3831, "step": 6144 }, { "epoch": 5.97, "learning_rate": 5e-05, "loss": 0.3891, "step": 6272 }, { "epoch": 6.1, "learning_rate": 5e-05, "loss": 0.2571, "step": 6400 }, { "epoch": 6.22, "learning_rate": 5e-05, "loss": 0.2232, "step": 6528 }, { "epoch": 6.34, "learning_rate": 5e-05, "loss": 0.2324, "step": 6656 }, { "epoch": 6.46, "learning_rate": 5e-05, "loss": 0.2421, "step": 6784 }, { "epoch": 6.58, "learning_rate": 5e-05, "loss": 0.2479, "step": 6912 }, { "epoch": 6.7, "learning_rate": 5e-05, "loss": 0.2584, "step": 7040 }, { "epoch": 6.83, "learning_rate": 5e-05, "loss": 0.2642, "step": 7168 }, { "epoch": 6.95, "learning_rate": 5e-05, "loss": 0.2706, "step": 7296 }, { "epoch": 7.07, "learning_rate": 5e-05, "loss": 0.203, "step": 7424 }, { "epoch": 7.19, "learning_rate": 5e-05, "loss": 0.1488, "step": 7552 }, { "epoch": 7.31, "learning_rate": 5e-05, "loss": 0.1654, "step": 7680 }, { "epoch": 7.44, "learning_rate": 5e-05, "loss": 0.1706, "step": 7808 }, { "epoch": 7.56, "learning_rate": 5e-05, "loss": 0.1799, "step": 7936 }, { "epoch": 7.68, "learning_rate": 5e-05, "loss": 0.1823, "step": 8064 }, { "epoch": 7.8, "learning_rate": 5e-05, "loss": 0.1867, "step": 8192 }, { "epoch": 7.92, "learning_rate": 5e-05, "loss": 0.1931, "step": 8320 }, { "epoch": 8.05, "learning_rate": 5e-05, "loss": 0.1635, "step": 8448 }, { "epoch": 8.17, "learning_rate": 5e-05, "loss": 0.1111, "step": 8576 }, { "epoch": 8.29, "learning_rate": 5e-05, "loss": 0.1158, "step": 8704 }, { "epoch": 8.41, "learning_rate": 5e-05, "loss": 0.1188, "step": 8832 }, { "epoch": 8.53, "learning_rate": 5e-05, "loss": 0.124, "step": 8960 }, { "epoch": 8.66, "learning_rate": 5e-05, "loss": 0.1295, "step": 9088 }, { "epoch": 8.78, "learning_rate": 5e-05, "loss": 0.1348, "step": 9216 }, { "epoch": 8.9, "learning_rate": 5e-05, "loss": 0.1386, "step": 9344 }, { "epoch": 9.02, "learning_rate": 5e-05, "loss": 0.1343, "step": 9472 }, { "epoch": 9.14, "learning_rate": 5e-05, "loss": 0.0847, "step": 9600 }, { "epoch": 9.26, "learning_rate": 5e-05, "loss": 0.0873, "step": 9728 }, { "epoch": 9.39, "learning_rate": 5e-05, "loss": 0.0915, "step": 9856 }, { "epoch": 9.51, "learning_rate": 5e-05, "loss": 0.0961, "step": 9984 }, { "epoch": 9.63, "learning_rate": 5e-05, "loss": 0.1003, "step": 10112 }, { "epoch": 9.75, "learning_rate": 5e-05, "loss": 0.1043, "step": 10240 }, { "epoch": 9.87, "learning_rate": 5e-05, "loss": 0.1094, "step": 10368 }, { "epoch": 10.0, "learning_rate": 5e-05, "loss": 0.1135, "step": 10496 }, { "epoch": 10.12, "learning_rate": 5e-05, "loss": 0.0723, "step": 10624 }, { "epoch": 10.24, "learning_rate": 5e-05, "loss": 0.0724, "step": 10752 }, { "epoch": 10.36, "learning_rate": 5e-05, "loss": 0.0759, "step": 10880 }, { "epoch": 10.48, "learning_rate": 5e-05, "loss": 0.0779, "step": 11008 }, { "epoch": 10.61, "learning_rate": 5e-05, "loss": 0.0817, "step": 11136 }, { "epoch": 10.73, "learning_rate": 5e-05, "loss": 0.0867, "step": 11264 }, { "epoch": 10.85, "learning_rate": 5e-05, "loss": 0.0908, "step": 11392 }, { "epoch": 10.97, "learning_rate": 5e-05, "loss": 0.0954, "step": 11520 }, { "epoch": 11.09, "learning_rate": 5e-05, "loss": 0.0691, "step": 11648 }, { "epoch": 11.22, "learning_rate": 5e-05, "loss": 0.0627, "step": 11776 }, { "epoch": 11.34, "learning_rate": 5e-05, "loss": 0.0649, "step": 11904 }, { "epoch": 11.46, "learning_rate": 5e-05, "loss": 0.0683, "step": 12032 }, { "epoch": 11.58, "learning_rate": 5e-05, "loss": 0.0721, "step": 12160 }, { "epoch": 11.7, "learning_rate": 5e-05, "loss": 0.0747, "step": 12288 }, { "epoch": 11.82, "learning_rate": 5e-05, "loss": 0.0768, "step": 12416 }, { "epoch": 11.95, "learning_rate": 5e-05, "loss": 0.0809, "step": 12544 }, { "epoch": 12.07, "learning_rate": 5e-05, "loss": 0.0681, "step": 12672 }, { "epoch": 12.19, "learning_rate": 5e-05, "loss": 0.0546, "step": 12800 }, { "epoch": 12.31, "learning_rate": 5e-05, "loss": 0.0561, "step": 12928 }, { "epoch": 12.43, "learning_rate": 5e-05, "loss": 0.059, "step": 13056 }, { "epoch": 12.56, "learning_rate": 5e-05, "loss": 0.0612, "step": 13184 }, { "epoch": 12.68, "learning_rate": 5e-05, "loss": 0.0644, "step": 13312 }, { "epoch": 12.8, "learning_rate": 5e-05, "loss": 0.0693, "step": 13440 }, { "epoch": 12.92, "learning_rate": 5e-05, "loss": 0.072, "step": 13568 }, { "epoch": 13.04, "learning_rate": 5e-05, "loss": 0.0663, "step": 13696 }, { "epoch": 13.17, "learning_rate": 5e-05, "loss": 0.0506, "step": 13824 }, { "epoch": 13.29, "learning_rate": 5e-05, "loss": 0.0512, "step": 13952 }, { "epoch": 13.41, "learning_rate": 5e-05, "loss": 0.0539, "step": 14080 }, { "epoch": 13.53, "learning_rate": 5e-05, "loss": 0.057, "step": 14208 }, { "epoch": 13.65, "learning_rate": 5e-05, "loss": 0.059, "step": 14336 }, { "epoch": 13.78, "learning_rate": 5e-05, "loss": 0.0614, "step": 14464 }, { "epoch": 13.9, "learning_rate": 5e-05, "loss": 0.0643, "step": 14592 }, { "epoch": 14.02, "learning_rate": 5e-05, "loss": 0.0645, "step": 14720 }, { "epoch": 14.14, "learning_rate": 5e-05, "loss": 0.0454, "step": 14848 }, { "epoch": 14.26, "learning_rate": 5e-05, "loss": 0.0465, "step": 14976 }, { "epoch": 14.38, "learning_rate": 5e-05, "loss": 0.0484, "step": 15104 }, { "epoch": 14.51, "learning_rate": 5e-05, "loss": 0.0508, "step": 15232 }, { "epoch": 14.63, "learning_rate": 5e-05, "loss": 0.053, "step": 15360 }, { "epoch": 14.75, "learning_rate": 5e-05, "loss": 0.0546, "step": 15488 }, { "epoch": 14.87, "learning_rate": 5e-05, "loss": 0.0566, "step": 15616 }, { "epoch": 14.99, "learning_rate": 5e-05, "loss": 0.0594, "step": 15744 }, { "epoch": 15.12, "learning_rate": 5e-05, "loss": 0.043, "step": 15872 }, { "epoch": 15.24, "learning_rate": 5e-05, "loss": 0.0433, "step": 16000 }, { "epoch": 15.36, "learning_rate": 5e-05, "loss": 0.0456, "step": 16128 }, { "epoch": 15.48, "learning_rate": 5e-05, "loss": 0.0474, "step": 16256 }, { "epoch": 15.6, "learning_rate": 5e-05, "loss": 0.049, "step": 16384 }, { "epoch": 15.73, "learning_rate": 5e-05, "loss": 0.0501, "step": 16512 }, { "epoch": 15.85, "learning_rate": 5e-05, "loss": 0.0522, "step": 16640 }, { "epoch": 15.97, "learning_rate": 5e-05, "loss": 0.0554, "step": 16768 } ], "max_steps": 21000, "num_train_epochs": 20, "total_flos": 3285446215892992.0, "trial_name": null, "trial_params": null }