| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 6.756756756756757, | |
| "eval_steps": 500, | |
| "global_step": 500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.996861268047709e-05, | |
| "loss": 2.8545, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.9937225360954175e-05, | |
| "loss": 2.6973, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.9905838041431265e-05, | |
| "loss": 2.8918, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.987445072190835e-05, | |
| "loss": 2.7159, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.984306340238544e-05, | |
| "loss": 2.681, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 4.981167608286253e-05, | |
| "loss": 2.7932, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.978028876333961e-05, | |
| "loss": 2.9297, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 4.97489014438167e-05, | |
| "loss": 2.58, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 4.971751412429379e-05, | |
| "loss": 2.7518, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 4.968612680477088e-05, | |
| "loss": 2.8066, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 4.965473948524796e-05, | |
| "loss": 2.6671, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 4.962335216572505e-05, | |
| "loss": 2.7779, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 4.959196484620214e-05, | |
| "loss": 2.4978, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 4.956057752667922e-05, | |
| "loss": 2.5562, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 4.952919020715631e-05, | |
| "loss": 2.7411, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 4.9497802887633396e-05, | |
| "loss": 2.7523, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 4.9466415568110485e-05, | |
| "loss": 2.6865, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 4.9435028248587575e-05, | |
| "loss": 2.804, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 4.940364092906466e-05, | |
| "loss": 3.051, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 4.937225360954175e-05, | |
| "loss": 2.7102, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 4.934086629001883e-05, | |
| "loss": 2.6128, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 4.930947897049592e-05, | |
| "loss": 2.632, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 4.927809165097301e-05, | |
| "loss": 2.5651, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 4.924670433145009e-05, | |
| "loss": 2.8894, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 4.921531701192718e-05, | |
| "loss": 2.7573, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 4.918392969240427e-05, | |
| "loss": 2.4314, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 4.915254237288136e-05, | |
| "loss": 2.6477, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 4.9121155053358444e-05, | |
| "loss": 2.5796, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 4.9089767733835534e-05, | |
| "loss": 2.6638, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 4.9058380414312623e-05, | |
| "loss": 2.7746, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 4.9026993094789706e-05, | |
| "loss": 2.8569, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 4.8995605775266796e-05, | |
| "loss": 2.4447, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 4.896421845574388e-05, | |
| "loss": 2.4985, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 4.893283113622097e-05, | |
| "loss": 2.7694, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 4.890144381669806e-05, | |
| "loss": 2.9069, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 4.887005649717514e-05, | |
| "loss": 2.721, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 4.883866917765223e-05, | |
| "loss": 2.5664, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 4.8807281858129313e-05, | |
| "loss": 2.7768, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "learning_rate": 4.87758945386064e-05, | |
| "loss": 2.5988, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "learning_rate": 4.874450721908349e-05, | |
| "loss": 2.6285, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 4.8713119899560576e-05, | |
| "loss": 2.6223, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "learning_rate": 4.8681732580037665e-05, | |
| "loss": 2.6413, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 4.8650345260514755e-05, | |
| "loss": 2.4766, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "learning_rate": 4.8618957940991844e-05, | |
| "loss": 2.6344, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "learning_rate": 4.8587570621468934e-05, | |
| "loss": 2.6272, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 4.855618330194602e-05, | |
| "loss": 2.6651, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "learning_rate": 4.8524795982423107e-05, | |
| "loss": 2.5442, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "learning_rate": 4.849340866290019e-05, | |
| "loss": 2.6415, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "learning_rate": 4.846202134337728e-05, | |
| "loss": 2.6057, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 4.843063402385437e-05, | |
| "loss": 2.3711, | |
| "step": 500 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 15930, | |
| "num_train_epochs": 216, | |
| "save_steps": 500, | |
| "total_flos": 2.816032166019072e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |