| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 16.0, | |
| "global_step": 16800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 5e-05, | |
| "loss": 1.7753, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 5e-05, | |
| "loss": 1.4026, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3331, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 5e-05, | |
| "loss": 1.3015, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2594, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2452, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2219, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 5e-05, | |
| "loss": 1.2057, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 5e-05, | |
| "loss": 1.086, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 5e-05, | |
| "loss": 1.046, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0642, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0602, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0552, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0565, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0602, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 5e-05, | |
| "loss": 1.0534, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 5e-05, | |
| "loss": 0.9373, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8638, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8759, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8775, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8906, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8947, | |
| "step": 2816 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8846, | |
| "step": 2944 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8891, | |
| "step": 3072 | |
| }, | |
| { | |
| "epoch": 3.05, | |
| "learning_rate": 5e-05, | |
| "loss": 0.8039, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 5e-05, | |
| "loss": 0.6736, | |
| "step": 3328 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 5e-05, | |
| "loss": 0.6814, | |
| "step": 3456 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 5e-05, | |
| "loss": 0.7016, | |
| "step": 3584 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 5e-05, | |
| "loss": 0.7039, | |
| "step": 3712 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 5e-05, | |
| "loss": 0.708, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 5e-05, | |
| "loss": 0.7175, | |
| "step": 3968 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 5e-05, | |
| "loss": 0.7214, | |
| "step": 4096 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 5e-05, | |
| "loss": 0.6764, | |
| "step": 4224 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "learning_rate": 5e-05, | |
| "loss": 0.4891, | |
| "step": 4352 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 5e-05, | |
| "loss": 0.5006, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "learning_rate": 5e-05, | |
| "loss": 0.5129, | |
| "step": 4608 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 5e-05, | |
| "loss": 0.5221, | |
| "step": 4736 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 5e-05, | |
| "loss": 0.5278, | |
| "step": 4864 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 5e-05, | |
| "loss": 0.5413, | |
| "step": 4992 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 5e-05, | |
| "loss": 0.5393, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 5e-05, | |
| "loss": 0.5398, | |
| "step": 5248 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3369, | |
| "step": 5376 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3417, | |
| "step": 5504 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3502, | |
| "step": 5632 | |
| }, | |
| { | |
| "epoch": 5.49, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3593, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3695, | |
| "step": 5888 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3764, | |
| "step": 6016 | |
| }, | |
| { | |
| "epoch": 5.85, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3831, | |
| "step": 6144 | |
| }, | |
| { | |
| "epoch": 5.97, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3891, | |
| "step": 6272 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2571, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2232, | |
| "step": 6528 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2324, | |
| "step": 6656 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2421, | |
| "step": 6784 | |
| }, | |
| { | |
| "epoch": 6.58, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2479, | |
| "step": 6912 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2584, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2642, | |
| "step": 7168 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "learning_rate": 5e-05, | |
| "loss": 0.2706, | |
| "step": 7296 | |
| }, | |
| { | |
| "epoch": 7.07, | |
| "learning_rate": 5e-05, | |
| "loss": 0.203, | |
| "step": 7424 | |
| }, | |
| { | |
| "epoch": 7.19, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1488, | |
| "step": 7552 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1654, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1706, | |
| "step": 7808 | |
| }, | |
| { | |
| "epoch": 7.56, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1799, | |
| "step": 7936 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1823, | |
| "step": 8064 | |
| }, | |
| { | |
| "epoch": 7.8, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1867, | |
| "step": 8192 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1931, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 8.05, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1635, | |
| "step": 8448 | |
| }, | |
| { | |
| "epoch": 8.17, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1111, | |
| "step": 8576 | |
| }, | |
| { | |
| "epoch": 8.29, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1158, | |
| "step": 8704 | |
| }, | |
| { | |
| "epoch": 8.41, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1188, | |
| "step": 8832 | |
| }, | |
| { | |
| "epoch": 8.53, | |
| "learning_rate": 5e-05, | |
| "loss": 0.124, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 8.66, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1295, | |
| "step": 9088 | |
| }, | |
| { | |
| "epoch": 8.78, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1348, | |
| "step": 9216 | |
| }, | |
| { | |
| "epoch": 8.9, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1386, | |
| "step": 9344 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1343, | |
| "step": 9472 | |
| }, | |
| { | |
| "epoch": 9.14, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0847, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 9.26, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0873, | |
| "step": 9728 | |
| }, | |
| { | |
| "epoch": 9.39, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0915, | |
| "step": 9856 | |
| }, | |
| { | |
| "epoch": 9.51, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0961, | |
| "step": 9984 | |
| }, | |
| { | |
| "epoch": 9.63, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1003, | |
| "step": 10112 | |
| }, | |
| { | |
| "epoch": 9.75, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1043, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 9.87, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1094, | |
| "step": 10368 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1135, | |
| "step": 10496 | |
| }, | |
| { | |
| "epoch": 10.12, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0723, | |
| "step": 10624 | |
| }, | |
| { | |
| "epoch": 10.24, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0724, | |
| "step": 10752 | |
| }, | |
| { | |
| "epoch": 10.36, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0759, | |
| "step": 10880 | |
| }, | |
| { | |
| "epoch": 10.48, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0779, | |
| "step": 11008 | |
| }, | |
| { | |
| "epoch": 10.61, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0817, | |
| "step": 11136 | |
| }, | |
| { | |
| "epoch": 10.73, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0867, | |
| "step": 11264 | |
| }, | |
| { | |
| "epoch": 10.85, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0908, | |
| "step": 11392 | |
| }, | |
| { | |
| "epoch": 10.97, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0954, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 11.09, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0691, | |
| "step": 11648 | |
| }, | |
| { | |
| "epoch": 11.22, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0627, | |
| "step": 11776 | |
| }, | |
| { | |
| "epoch": 11.34, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0649, | |
| "step": 11904 | |
| }, | |
| { | |
| "epoch": 11.46, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0683, | |
| "step": 12032 | |
| }, | |
| { | |
| "epoch": 11.58, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0721, | |
| "step": 12160 | |
| }, | |
| { | |
| "epoch": 11.7, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0747, | |
| "step": 12288 | |
| }, | |
| { | |
| "epoch": 11.82, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0768, | |
| "step": 12416 | |
| }, | |
| { | |
| "epoch": 11.95, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0809, | |
| "step": 12544 | |
| }, | |
| { | |
| "epoch": 12.07, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0681, | |
| "step": 12672 | |
| }, | |
| { | |
| "epoch": 12.19, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0546, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 12.31, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0561, | |
| "step": 12928 | |
| }, | |
| { | |
| "epoch": 12.43, | |
| "learning_rate": 5e-05, | |
| "loss": 0.059, | |
| "step": 13056 | |
| }, | |
| { | |
| "epoch": 12.56, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0612, | |
| "step": 13184 | |
| }, | |
| { | |
| "epoch": 12.68, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0644, | |
| "step": 13312 | |
| }, | |
| { | |
| "epoch": 12.8, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0693, | |
| "step": 13440 | |
| }, | |
| { | |
| "epoch": 12.92, | |
| "learning_rate": 5e-05, | |
| "loss": 0.072, | |
| "step": 13568 | |
| }, | |
| { | |
| "epoch": 13.04, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0663, | |
| "step": 13696 | |
| }, | |
| { | |
| "epoch": 13.17, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0506, | |
| "step": 13824 | |
| }, | |
| { | |
| "epoch": 13.29, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0512, | |
| "step": 13952 | |
| }, | |
| { | |
| "epoch": 13.41, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0539, | |
| "step": 14080 | |
| }, | |
| { | |
| "epoch": 13.53, | |
| "learning_rate": 5e-05, | |
| "loss": 0.057, | |
| "step": 14208 | |
| }, | |
| { | |
| "epoch": 13.65, | |
| "learning_rate": 5e-05, | |
| "loss": 0.059, | |
| "step": 14336 | |
| }, | |
| { | |
| "epoch": 13.78, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0614, | |
| "step": 14464 | |
| }, | |
| { | |
| "epoch": 13.9, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0643, | |
| "step": 14592 | |
| }, | |
| { | |
| "epoch": 14.02, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0645, | |
| "step": 14720 | |
| }, | |
| { | |
| "epoch": 14.14, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0454, | |
| "step": 14848 | |
| }, | |
| { | |
| "epoch": 14.26, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0465, | |
| "step": 14976 | |
| }, | |
| { | |
| "epoch": 14.38, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0484, | |
| "step": 15104 | |
| }, | |
| { | |
| "epoch": 14.51, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0508, | |
| "step": 15232 | |
| }, | |
| { | |
| "epoch": 14.63, | |
| "learning_rate": 5e-05, | |
| "loss": 0.053, | |
| "step": 15360 | |
| }, | |
| { | |
| "epoch": 14.75, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0546, | |
| "step": 15488 | |
| }, | |
| { | |
| "epoch": 14.87, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0566, | |
| "step": 15616 | |
| }, | |
| { | |
| "epoch": 14.99, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0594, | |
| "step": 15744 | |
| }, | |
| { | |
| "epoch": 15.12, | |
| "learning_rate": 5e-05, | |
| "loss": 0.043, | |
| "step": 15872 | |
| }, | |
| { | |
| "epoch": 15.24, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0433, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 15.36, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0456, | |
| "step": 16128 | |
| }, | |
| { | |
| "epoch": 15.48, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0474, | |
| "step": 16256 | |
| }, | |
| { | |
| "epoch": 15.6, | |
| "learning_rate": 5e-05, | |
| "loss": 0.049, | |
| "step": 16384 | |
| }, | |
| { | |
| "epoch": 15.73, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0501, | |
| "step": 16512 | |
| }, | |
| { | |
| "epoch": 15.85, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0522, | |
| "step": 16640 | |
| }, | |
| { | |
| "epoch": 15.97, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0554, | |
| "step": 16768 | |
| } | |
| ], | |
| "max_steps": 21000, | |
| "num_train_epochs": 20, | |
| "total_flos": 3285446215892992.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |