| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 135.59322033898306, | |
| "global_step": 500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 0.0196, | |
| "loss": 4.115, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 0.0192, | |
| "loss": 4.0422, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "learning_rate": 0.0188, | |
| "loss": 3.7797, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 10.85, | |
| "learning_rate": 0.0184, | |
| "loss": 3.204, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 13.56, | |
| "learning_rate": 0.018000000000000002, | |
| "loss": 2.7285, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 16.27, | |
| "learning_rate": 0.0176, | |
| "loss": 2.1524, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 18.98, | |
| "learning_rate": 0.0172, | |
| "loss": 1.6875, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 21.69, | |
| "learning_rate": 0.0168, | |
| "loss": 1.2613, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 24.41, | |
| "learning_rate": 0.016399999999999998, | |
| "loss": 0.9464, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 27.12, | |
| "learning_rate": 0.016, | |
| "loss": 0.734, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 29.83, | |
| "learning_rate": 0.015600000000000001, | |
| "loss": 0.5502, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 32.54, | |
| "learning_rate": 0.0152, | |
| "loss": 0.4353, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 35.25, | |
| "learning_rate": 0.0148, | |
| "loss": 0.3286, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 37.97, | |
| "learning_rate": 0.0144, | |
| "loss": 0.2814, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 40.68, | |
| "learning_rate": 0.013999999999999999, | |
| "loss": 0.2337, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 43.39, | |
| "learning_rate": 0.013600000000000001, | |
| "loss": 0.1949, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 46.1, | |
| "learning_rate": 0.013200000000000002, | |
| "loss": 0.1482, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 48.81, | |
| "learning_rate": 0.0128, | |
| "loss": 0.136, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 51.53, | |
| "learning_rate": 0.0124, | |
| "loss": 0.1175, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 54.24, | |
| "learning_rate": 0.012, | |
| "loss": 0.0995, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 56.95, | |
| "learning_rate": 0.0116, | |
| "loss": 0.0841, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 59.66, | |
| "learning_rate": 0.011200000000000002, | |
| "loss": 0.07, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 62.37, | |
| "learning_rate": 0.0108, | |
| "loss": 0.0672, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 65.08, | |
| "learning_rate": 0.010400000000000001, | |
| "loss": 0.058, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 67.8, | |
| "learning_rate": 0.01, | |
| "loss": 0.0546, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 70.51, | |
| "learning_rate": 0.0096, | |
| "loss": 0.0503, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 73.22, | |
| "learning_rate": 0.0092, | |
| "loss": 0.0471, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 75.93, | |
| "learning_rate": 0.0088, | |
| "loss": 0.0422, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 78.64, | |
| "learning_rate": 0.0084, | |
| "loss": 0.0352, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 81.36, | |
| "learning_rate": 0.008, | |
| "loss": 0.0374, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 84.07, | |
| "learning_rate": 0.0076, | |
| "loss": 0.033, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 86.78, | |
| "learning_rate": 0.0072, | |
| "loss": 0.0332, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 89.49, | |
| "learning_rate": 0.0068000000000000005, | |
| "loss": 0.0292, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 92.2, | |
| "learning_rate": 0.0064, | |
| "loss": 0.0298, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 94.92, | |
| "learning_rate": 0.006, | |
| "loss": 0.0258, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 97.63, | |
| "learning_rate": 0.005600000000000001, | |
| "loss": 0.0263, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 100.34, | |
| "learning_rate": 0.005200000000000001, | |
| "loss": 0.0252, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 103.05, | |
| "learning_rate": 0.0048, | |
| "loss": 0.0249, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 105.76, | |
| "learning_rate": 0.0044, | |
| "loss": 0.0225, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 108.47, | |
| "learning_rate": 0.004, | |
| "loss": 0.0219, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 111.19, | |
| "learning_rate": 0.0036, | |
| "loss": 0.0224, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 113.9, | |
| "learning_rate": 0.0032, | |
| "loss": 0.0238, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 116.61, | |
| "learning_rate": 0.0028000000000000004, | |
| "loss": 0.0196, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 119.32, | |
| "learning_rate": 0.0024, | |
| "loss": 0.0207, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 122.03, | |
| "learning_rate": 0.002, | |
| "loss": 0.0208, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 124.75, | |
| "learning_rate": 0.0016, | |
| "loss": 0.0204, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 127.46, | |
| "learning_rate": 0.0012, | |
| "loss": 0.0207, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 130.17, | |
| "learning_rate": 0.0008, | |
| "loss": 0.0206, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 132.88, | |
| "learning_rate": 0.0004, | |
| "loss": 0.0203, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 135.59, | |
| "learning_rate": 0.0, | |
| "loss": 0.021, | |
| "step": 500 | |
| } | |
| ], | |
| "max_steps": 500, | |
| "num_train_epochs": 167, | |
| "total_flos": 6.9331442466816e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |