| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 6.756756756756757, | |
| "eval_steps": 500, | |
| "global_step": 500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.996861268047709e-05, | |
| "loss": 2.8343, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.9937225360954175e-05, | |
| "loss": 2.6082, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 4.9905838041431265e-05, | |
| "loss": 2.7027, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.987445072190835e-05, | |
| "loss": 2.5134, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.984306340238544e-05, | |
| "loss": 2.413, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 4.981167608286253e-05, | |
| "loss": 2.4923, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.978028876333961e-05, | |
| "loss": 2.5383, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 4.97489014438167e-05, | |
| "loss": 2.1952, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 4.971751412429379e-05, | |
| "loss": 2.3365, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 4.968612680477088e-05, | |
| "loss": 2.2645, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 4.965473948524796e-05, | |
| "loss": 2.2224, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 4.962335216572505e-05, | |
| "loss": 2.1858, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 4.959196484620214e-05, | |
| "loss": 2.01, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 4.956057752667922e-05, | |
| "loss": 2.0707, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 4.952919020715631e-05, | |
| "loss": 2.1271, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 4.9497802887633396e-05, | |
| "loss": 2.1912, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 4.9466415568110485e-05, | |
| "loss": 1.9693, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 4.9435028248587575e-05, | |
| "loss": 1.7561, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 4.940364092906466e-05, | |
| "loss": 2.1115, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 4.937225360954175e-05, | |
| "loss": 1.9374, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 4.934086629001883e-05, | |
| "loss": 1.8973, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 4.930947897049592e-05, | |
| "loss": 1.9069, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 4.927809165097301e-05, | |
| "loss": 1.8768, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 4.924670433145009e-05, | |
| "loss": 1.7559, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 4.921531701192718e-05, | |
| "loss": 1.6955, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 4.918392969240427e-05, | |
| "loss": 1.4821, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 4.915254237288136e-05, | |
| "loss": 1.7175, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 4.9121155053358444e-05, | |
| "loss": 1.6685, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 4.9089767733835534e-05, | |
| "loss": 1.6313, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 4.9058380414312623e-05, | |
| "loss": 1.6231, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 4.9026993094789706e-05, | |
| "loss": 1.4119, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 4.8995605775266796e-05, | |
| "loss": 1.2547, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 4.896421845574388e-05, | |
| "loss": 1.3748, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 4.893283113622097e-05, | |
| "loss": 1.3336, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 4.890144381669806e-05, | |
| "loss": 1.4334, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 4.887005649717514e-05, | |
| "loss": 1.4155, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 4.883866917765223e-05, | |
| "loss": 1.4832, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 4.8807281858129313e-05, | |
| "loss": 0.9712, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "learning_rate": 4.87758945386064e-05, | |
| "loss": 1.1672, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "learning_rate": 4.874450721908349e-05, | |
| "loss": 1.338, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 4.8713119899560576e-05, | |
| "loss": 1.0129, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "learning_rate": 4.8681732580037665e-05, | |
| "loss": 1.1344, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 4.8650345260514755e-05, | |
| "loss": 1.139, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "learning_rate": 4.8618957940991844e-05, | |
| "loss": 1.1943, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "learning_rate": 4.8587570621468934e-05, | |
| "loss": 0.9281, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 4.855618330194602e-05, | |
| "loss": 0.9121, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "learning_rate": 4.8524795982423107e-05, | |
| "loss": 1.0089, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "learning_rate": 4.849340866290019e-05, | |
| "loss": 0.9739, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "learning_rate": 4.846202134337728e-05, | |
| "loss": 0.958, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 4.843063402385437e-05, | |
| "loss": 0.7523, | |
| "step": 500 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 15930, | |
| "num_train_epochs": 216, | |
| "save_steps": 500, | |
| "total_flos": 2.821509238259712e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |