{ "best_metric": null, "best_model_checkpoint": null, "epoch": 35.08771929824562, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.35, "learning_rate": 0.0002, "loss": 2.0495, "step": 10 }, { "epoch": 0.7, "learning_rate": 0.0002, "loss": 1.9419, "step": 20 }, { "epoch": 1.05, "learning_rate": 0.0002, "loss": 1.6328, "step": 30 }, { "epoch": 1.4, "learning_rate": 0.0002, "loss": 1.2627, "step": 40 }, { "epoch": 1.75, "learning_rate": 0.0002, "loss": 1.1166, "step": 50 }, { "epoch": 2.11, "learning_rate": 0.0002, "loss": 0.9466, "step": 60 }, { "epoch": 2.46, "learning_rate": 0.0002, "loss": 0.4556, "step": 70 }, { "epoch": 2.81, "learning_rate": 0.0002, "loss": 0.4507, "step": 80 }, { "epoch": 3.16, "learning_rate": 0.0002, "loss": 0.3553, "step": 90 }, { "epoch": 3.51, "learning_rate": 0.0002, "loss": 0.1801, "step": 100 }, { "epoch": 3.86, "learning_rate": 0.0002, "loss": 0.2521, "step": 110 }, { "epoch": 4.21, "learning_rate": 0.0002, "loss": 0.1021, "step": 120 }, { "epoch": 4.56, "learning_rate": 0.0002, "loss": 0.0998, "step": 130 }, { "epoch": 4.91, "learning_rate": 0.0002, "loss": 0.0726, "step": 140 }, { "epoch": 5.26, "learning_rate": 0.0002, "loss": 0.0708, "step": 150 }, { "epoch": 5.61, "learning_rate": 0.0002, "loss": 0.0327, "step": 160 }, { "epoch": 5.96, "learning_rate": 0.0002, "loss": 0.0662, "step": 170 }, { "epoch": 6.32, "learning_rate": 0.0002, "loss": 0.0258, "step": 180 }, { "epoch": 6.67, "learning_rate": 0.0002, "loss": 0.0301, "step": 190 }, { "epoch": 7.02, "learning_rate": 0.0002, "loss": 0.0412, "step": 200 }, { "epoch": 7.37, "learning_rate": 0.0002, "loss": 0.0396, "step": 210 }, { "epoch": 7.72, "learning_rate": 0.0002, "loss": 0.0155, "step": 220 }, { "epoch": 8.07, "learning_rate": 0.0002, "loss": 0.0295, "step": 230 }, { "epoch": 8.42, "learning_rate": 0.0002, "loss": 0.01, "step": 240 }, { "epoch": 8.77, "learning_rate": 0.0002, "loss": 0.0247, "step": 250 }, { "epoch": 9.12, "learning_rate": 0.0002, "loss": 0.0183, "step": 260 }, { "epoch": 9.47, "learning_rate": 0.0002, "loss": 0.0018, "step": 270 }, { "epoch": 9.82, "learning_rate": 0.0002, "loss": 0.0053, "step": 280 }, { "epoch": 10.18, "learning_rate": 0.0002, "loss": 0.0161, "step": 290 }, { "epoch": 10.53, "learning_rate": 0.0002, "loss": 0.0122, "step": 300 }, { "epoch": 10.88, "learning_rate": 0.0002, "loss": 0.0126, "step": 310 }, { "epoch": 11.23, "learning_rate": 0.0002, "loss": 0.0135, "step": 320 }, { "epoch": 11.58, "learning_rate": 0.0002, "loss": 0.0196, "step": 330 }, { "epoch": 11.93, "learning_rate": 0.0002, "loss": 0.014, "step": 340 }, { "epoch": 12.28, "learning_rate": 0.0002, "loss": 0.0056, "step": 350 }, { "epoch": 12.63, "learning_rate": 0.0002, "loss": 0.0078, "step": 360 }, { "epoch": 12.98, "learning_rate": 0.0002, "loss": 0.0151, "step": 370 }, { "epoch": 13.33, "learning_rate": 0.0002, "loss": 0.0155, "step": 380 }, { "epoch": 13.68, "learning_rate": 0.0002, "loss": 0.0019, "step": 390 }, { "epoch": 14.04, "learning_rate": 0.0002, "loss": 0.0396, "step": 400 }, { "epoch": 14.39, "learning_rate": 0.0002, "loss": 0.0262, "step": 410 }, { "epoch": 14.74, "learning_rate": 0.0002, "loss": 0.0209, "step": 420 }, { "epoch": 15.09, "learning_rate": 0.0002, "loss": 0.0094, "step": 430 }, { "epoch": 15.44, "learning_rate": 0.0002, "loss": 0.0126, "step": 440 }, { "epoch": 15.79, "learning_rate": 0.0002, "loss": 0.0118, "step": 450 }, { "epoch": 16.14, "learning_rate": 0.0002, "loss": 0.0049, "step": 460 }, { "epoch": 16.49, "learning_rate": 0.0002, "loss": 0.0018, "step": 470 }, { "epoch": 16.84, "learning_rate": 0.0002, "loss": 0.0097, "step": 480 }, { "epoch": 17.19, "learning_rate": 0.0002, "loss": 0.0115, "step": 490 }, { "epoch": 17.54, "learning_rate": 0.0002, "loss": 0.0033, "step": 500 }, { "epoch": 17.89, "learning_rate": 0.0002, "loss": 0.0122, "step": 510 }, { "epoch": 18.25, "learning_rate": 0.0002, "loss": 0.0083, "step": 520 }, { "epoch": 18.6, "learning_rate": 0.0002, "loss": 0.002, "step": 530 }, { "epoch": 18.95, "learning_rate": 0.0002, "loss": 0.0136, "step": 540 }, { "epoch": 19.3, "learning_rate": 0.0002, "loss": 0.0127, "step": 550 }, { "epoch": 19.65, "learning_rate": 0.0002, "loss": 0.004, "step": 560 }, { "epoch": 20.0, "learning_rate": 0.0002, "loss": 0.0082, "step": 570 }, { "epoch": 20.35, "learning_rate": 0.0002, "loss": 0.0147, "step": 580 }, { "epoch": 20.7, "learning_rate": 0.0002, "loss": 0.0206, "step": 590 }, { "epoch": 21.05, "learning_rate": 0.0002, "loss": 0.0238, "step": 600 }, { "epoch": 21.4, "learning_rate": 0.0002, "loss": 0.0286, "step": 610 }, { "epoch": 21.75, "learning_rate": 0.0002, "loss": 0.0484, "step": 620 }, { "epoch": 22.11, "learning_rate": 0.0002, "loss": 0.0251, "step": 630 }, { "epoch": 22.46, "learning_rate": 0.0002, "loss": 0.0324, "step": 640 }, { "epoch": 22.81, "learning_rate": 0.0002, "loss": 0.024, "step": 650 }, { "epoch": 23.16, "learning_rate": 0.0002, "loss": 0.0166, "step": 660 }, { "epoch": 23.51, "learning_rate": 0.0002, "loss": 0.0298, "step": 670 }, { "epoch": 23.86, "learning_rate": 0.0002, "loss": 0.0383, "step": 680 }, { "epoch": 24.21, "learning_rate": 0.0002, "loss": 0.0309, "step": 690 }, { "epoch": 24.56, "learning_rate": 0.0002, "loss": 0.0164, "step": 700 }, { "epoch": 24.91, "learning_rate": 0.0002, "loss": 0.0281, "step": 710 }, { "epoch": 25.26, "learning_rate": 0.0002, "loss": 0.0146, "step": 720 }, { "epoch": 25.61, "learning_rate": 0.0002, "loss": 0.0433, "step": 730 }, { "epoch": 25.96, "learning_rate": 0.0002, "loss": 0.0543, "step": 740 }, { "epoch": 26.32, "learning_rate": 0.0002, "loss": 0.0786, "step": 750 }, { "epoch": 26.67, "learning_rate": 0.0002, "loss": 0.0248, "step": 760 }, { "epoch": 27.02, "learning_rate": 0.0002, "loss": 0.0249, "step": 770 }, { "epoch": 27.37, "learning_rate": 0.0002, "loss": 0.0253, "step": 780 }, { "epoch": 27.72, "learning_rate": 0.0002, "loss": 0.0111, "step": 790 }, { "epoch": 28.07, "learning_rate": 0.0002, "loss": 0.0356, "step": 800 }, { "epoch": 28.42, "learning_rate": 0.0002, "loss": 0.0597, "step": 810 }, { "epoch": 28.77, "learning_rate": 0.0002, "loss": 0.0473, "step": 820 }, { "epoch": 29.12, "learning_rate": 0.0002, "loss": 0.0233, "step": 830 }, { "epoch": 29.47, "learning_rate": 0.0002, "loss": 0.0085, "step": 840 }, { "epoch": 29.82, "learning_rate": 0.0002, "loss": 0.0114, "step": 850 }, { "epoch": 30.18, "learning_rate": 0.0002, "loss": 0.035, "step": 860 }, { "epoch": 30.53, "learning_rate": 0.0002, "loss": 0.017, "step": 870 }, { "epoch": 30.88, "learning_rate": 0.0002, "loss": 0.0398, "step": 880 }, { "epoch": 31.23, "learning_rate": 0.0002, "loss": 0.0101, "step": 890 }, { "epoch": 31.58, "learning_rate": 0.0002, "loss": 0.0163, "step": 900 }, { "epoch": 31.93, "learning_rate": 0.0002, "loss": 0.0425, "step": 910 }, { "epoch": 32.28, "learning_rate": 0.0002, "loss": 0.0319, "step": 920 }, { "epoch": 32.63, "learning_rate": 0.0002, "loss": 0.0163, "step": 930 }, { "epoch": 32.98, "learning_rate": 0.0002, "loss": 0.0236, "step": 940 }, { "epoch": 33.33, "learning_rate": 0.0002, "loss": 0.0255, "step": 950 }, { "epoch": 33.68, "learning_rate": 0.0002, "loss": 0.0261, "step": 960 }, { "epoch": 34.04, "learning_rate": 0.0002, "loss": 0.0197, "step": 970 }, { "epoch": 34.39, "learning_rate": 0.0002, "loss": 0.0323, "step": 980 }, { "epoch": 34.74, "learning_rate": 0.0002, "loss": 0.0456, "step": 990 }, { "epoch": 35.09, "learning_rate": 0.0002, "loss": 0.0149, "step": 1000 } ], "max_steps": 1000, "num_train_epochs": 36, "total_flos": 8864472152586240.0, "trial_name": null, "trial_params": null }