| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.953846153846154, | |
| "eval_steps": 500, | |
| "global_step": 144, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 5.2885, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 4.3115, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.999744599547812e-05, | |
| "loss": 3.1889, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.99770217861636e-05, | |
| "loss": 2.2276, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.9936215093023884e-05, | |
| "loss": 1.4245, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.9875109281794828e-05, | |
| "loss": 0.9188, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.9793829188147406e-05, | |
| "loss": 4.5676, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.9692540862655587e-05, | |
| "loss": 3.4422, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.9571451231564523e-05, | |
| "loss": 1.9053, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.9430807674052092e-05, | |
| "loss": 1.1145, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.9270897516847406e-05, | |
| "loss": 0.717, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.9092047447238775e-05, | |
| "loss": 0.5241, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 1.8894622845670282e-05, | |
| "loss": 3.1616, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 1.86790270392905e-05, | |
| "loss": 1.4529, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.8445700477978207e-05, | |
| "loss": 0.8347, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.8195119834528535e-05, | |
| "loss": 0.4146, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.792779703083777e-05, | |
| "loss": 0.2956, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.764427819207624e-05, | |
| "loss": 0.2205, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.734514253098589e-05, | |
| "loss": 1.6814, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.7031001164581828e-05, | |
| "loss": 0.4792, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.670249586567531e-05, | |
| "loss": 0.2038, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 1.636029775176862e-05, | |
| "loss": 0.1199, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.6005105914000508e-05, | |
| "loss": 0.0972, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.5637645988943008e-05, | |
| "loss": 0.0947, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 1.5258668676167548e-05, | |
| "loss": 0.6136, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 1.48689482046087e-05, | |
| "loss": 0.3785, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 1.4469280750858854e-05, | |
| "loss": 0.1075, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 1.4060482812625055e-05, | |
| "loss": 0.0497, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 1.3643389540670963e-05, | |
| "loss": 0.0431, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 1.3218853032651719e-05, | |
| "loss": 0.0432, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 1.2787740592327232e-05, | |
| "loss": 0.2216, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 1.2350932957710322e-05, | |
| "loss": 0.1697, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 1.1909322501769407e-05, | |
| "loss": 0.0716, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 1.1463811409361667e-05, | |
| "loss": 0.0357, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 1.1015309834121083e-05, | |
| "loss": 0.02, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 1.05647340390667e-05, | |
| "loss": 0.0326, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 1.01130045247298e-05, | |
| "loss": 0.1052, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 9.661044148624038e-06, | |
| "loss": 0.061, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 9.209776239900453e-06, | |
| "loss": 0.0387, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 8.76012271303888e-06, | |
| "loss": 0.0186, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 8.313002184429529e-06, | |
| "loss": 0.0249, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 7.869328095692313e-06, | |
| "loss": 0.0319, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 7.430006847567972e-06, | |
| "loss": 0.0586, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 6.995935948193294e-06, | |
| "loss": 0.0917, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 6.568002179543409e-06, | |
| "loss": 0.0284, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 6.147079785787038e-06, | |
| "loss": 0.0158, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 5.7340286872557515e-06, | |
| "loss": 0.0145, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 5.329692723675994e-06, | |
| "loss": 0.0189, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 4.934897930252887e-06, | |
| "loss": 0.03, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 4.550450850127626e-06, | |
| "loss": 0.0605, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 4.177136886656067e-06, | |
| "loss": 0.0348, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 3.815718698874672e-06, | |
| "loss": 0.0191, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 3.466934643431795e-06, | |
| "loss": 0.0174, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 3.1314972661673572e-06, | |
| "loss": 0.0167, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 2.8100918464225304e-06, | |
| "loss": 0.0295, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 2.5033749970533015e-06, | |
| "loss": 0.0499, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 2.211973323008041e-06, | |
| "loss": 0.0262, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.936482141209486e-06, | |
| "loss": 0.0162, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 1.6774642643563955e-06, | |
| "loss": 0.012, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 1.4354488511294418e-06, | |
| "loss": 0.0198, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 1.2109303251503434e-06, | |
| "loss": 0.0291, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 1.0043673649027519e-06, | |
| "loss": 0.0534, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 8.161819666783888e-07, | |
| "loss": 0.0224, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.467585824627886e-07, | |
| "loss": 0.0184, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 4.964433345219354e-07, | |
| "loss": 0.0157, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 3.6554330829429716e-07, | |
| "loss": 0.0177, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 2.5432592503288e-07, | |
| "loss": 0.0268, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 1.630183954789233e-07, | |
| "loss": 0.0562, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 9.180725568338045e-08, | |
| "loss": 0.0211, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 4.083798592444899e-08, | |
| "loss": 0.0202, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.0214713499706596e-08, | |
| "loss": 0.0142, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 0.0, | |
| "loss": 0.0116, | |
| "step": 144 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 144, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "total_flos": 9993412315865088.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |