{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.53257790368272, "global_step": 400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 1.9974521146102535e-05, "loss": 1.0269, "step": 10 }, { "epoch": 0.23, "learning_rate": 1.989821441880933e-05, "loss": 0.9778, "step": 20 }, { "epoch": 0.34, "learning_rate": 1.9771468659711595e-05, "loss": 0.9404, "step": 30 }, { "epoch": 0.45, "learning_rate": 1.9594929736144978e-05, "loss": 0.8988, "step": 40 }, { "epoch": 0.57, "learning_rate": 1.936949724999762e-05, "loss": 0.8994, "step": 50 }, { "epoch": 0.68, "learning_rate": 1.9096319953545186e-05, "loss": 0.8785, "step": 60 }, { "epoch": 0.79, "learning_rate": 1.8776789895672557e-05, "loss": 0.8423, "step": 70 }, { "epoch": 0.91, "learning_rate": 1.8412535328311813e-05, "loss": 0.8139, "step": 80 }, { "epoch": 1.02, "learning_rate": 1.8005412409243604e-05, "loss": 0.8333, "step": 90 }, { "epoch": 1.13, "learning_rate": 1.7557495743542586e-05, "loss": 0.8056, "step": 100 }, { "epoch": 1.25, "learning_rate": 1.7071067811865477e-05, "loss": 0.8051, "step": 110 }, { "epoch": 1.36, "learning_rate": 1.6548607339452853e-05, "loss": 0.7923, "step": 120 }, { "epoch": 1.47, "learning_rate": 1.599277666511347e-05, "loss": 0.8327, "step": 130 }, { "epoch": 1.59, "learning_rate": 1.5406408174555978e-05, "loss": 0.7954, "step": 140 }, { "epoch": 1.7, "learning_rate": 1.479248986720057e-05, "loss": 0.797, "step": 150 }, { "epoch": 1.81, "learning_rate": 1.4154150130018867e-05, "loss": 0.7754, "step": 160 }, { "epoch": 1.93, "learning_rate": 1.3494641795990986e-05, "loss": 0.7847, "step": 170 }, { "epoch": 2.04, "learning_rate": 1.2817325568414299e-05, "loss": 0.7779, "step": 180 }, { "epoch": 2.15, "learning_rate": 1.2125652895529766e-05, "loss": 0.7569, "step": 190 }, { "epoch": 2.27, "learning_rate": 1.1423148382732854e-05, "loss": 0.7688, "step": 200 }, { "epoch": 2.38, "learning_rate": 1.0713391831992324e-05, "loss": 0.7734, "step": 210 }, { "epoch": 2.49, "learning_rate": 1e-05, "loss": 0.7668, "step": 220 }, { "epoch": 2.61, "learning_rate": 9.286608168007678e-06, "loss": 0.7951, "step": 230 }, { "epoch": 2.72, "learning_rate": 8.576851617267151e-06, "loss": 0.7616, "step": 240 }, { "epoch": 2.83, "learning_rate": 7.874347104470234e-06, "loss": 0.7619, "step": 250 }, { "epoch": 2.95, "learning_rate": 7.182674431585703e-06, "loss": 0.7701, "step": 260 }, { "epoch": 3.06, "learning_rate": 6.505358204009018e-06, "loss": 0.7985, "step": 270 }, { "epoch": 3.17, "learning_rate": 5.845849869981137e-06, "loss": 0.75, "step": 280 }, { "epoch": 3.29, "learning_rate": 5.207510132799436e-06, "loss": 0.7508, "step": 290 }, { "epoch": 3.4, "learning_rate": 4.593591825444028e-06, "loss": 0.7655, "step": 300 }, { "epoch": 3.51, "learning_rate": 4.007223334886531e-06, "loss": 0.7686, "step": 310 }, { "epoch": 3.63, "learning_rate": 3.4513926605471504e-06, "loss": 0.7551, "step": 320 }, { "epoch": 3.74, "learning_rate": 2.9289321881345257e-06, "loss": 0.7524, "step": 330 }, { "epoch": 3.85, "learning_rate": 2.4425042564574186e-06, "loss": 0.7722, "step": 340 }, { "epoch": 3.97, "learning_rate": 1.994587590756397e-06, "loss": 0.7471, "step": 350 }, { "epoch": 4.08, "learning_rate": 1.587464671688187e-06, "loss": 0.754, "step": 360 }, { "epoch": 4.19, "learning_rate": 1.2232101043274437e-06, "loss": 0.7549, "step": 370 }, { "epoch": 4.31, "learning_rate": 9.036800464548157e-07, "loss": 0.7678, "step": 380 }, { "epoch": 4.42, "learning_rate": 6.305027500023841e-07, "loss": 0.7584, "step": 390 }, { "epoch": 4.53, "learning_rate": 4.0507026385502747e-07, "loss": 0.7613, "step": 400 } ], "max_steps": 440, "num_train_epochs": 5, "total_flos": 3.127602400495534e+17, "trial_name": null, "trial_params": null }