| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 230, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 0.1731, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 0.1107, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": 0.1074, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.1428571428571429e-05, | |
| "loss": 0.1454, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.4285714285714287e-05, | |
| "loss": 0.1185, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.7142857142857142e-05, | |
| "loss": 0.2155, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 2e-05, | |
| "loss": 0.1626, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.9999007677495127e-05, | |
| "loss": 0.1486, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.9996030906921302e-05, | |
| "loss": 0.1611, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.9991070279061808e-05, | |
| "loss": 0.1354, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.9984126778425178e-05, | |
| "loss": 0.1309, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.9975201783049804e-05, | |
| "loss": 0.1228, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.9964297064230437e-05, | |
| "loss": 0.1096, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.9951414786166656e-05, | |
| "loss": 0.1152, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.9936557505533346e-05, | |
| "loss": 0.1058, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.9919728170973297e-05, | |
| "loss": 0.0853, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.9900930122511993e-05, | |
| "loss": 0.1111, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.988016709089474e-05, | |
| "loss": 1.8086, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.985744319684625e-05, | |
| "loss": 0.6606, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.9832762950252813e-05, | |
| "loss": 0.2228, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.9806131249267256e-05, | |
| "loss": 0.1629, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.977755337933682e-05, | |
| "loss": 0.1505, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.9747035012154203e-05, | |
| "loss": 0.1273, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 1.971458220453192e-05, | |
| "loss": 0.159, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 1.968020139720024e-05, | |
| "loss": 0.1383, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 1.9643899413528926e-05, | |
| "loss": 0.1456, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 1.960568345817306e-05, | |
| "loss": 0.1609, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.9565561115643153e-05, | |
| "loss": 0.1258, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 1.9523540348799887e-05, | |
| "loss": 0.1219, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.9479629497273783e-05, | |
| "loss": 0.122, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.9433837275810084e-05, | |
| "loss": 0.1164, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.9386172772539162e-05, | |
| "loss": 0.1234, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.933664544717288e-05, | |
| "loss": 0.1189, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.928526512912715e-05, | |
| "loss": 0.1278, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.9232042015571152e-05, | |
| "loss": 0.1117, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.9176986669403556e-05, | |
| "loss": 0.1157, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.9120110017156172e-05, | |
| "loss": 0.1032, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.9061423346825395e-05, | |
| "loss": 0.1035, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.9000938305631975e-05, | |
| "loss": 0.1029, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.8938666897709427e-05, | |
| "loss": 0.1058, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 1.8874621481721645e-05, | |
| "loss": 0.1039, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 1.8808814768410157e-05, | |
| "loss": 0.114, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 1.874125981807148e-05, | |
| "loss": 0.1076, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.867197003796512e-05, | |
| "loss": 0.0863, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.8600959179652708e-05, | |
| "loss": 0.0893, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.852824133626881e-05, | |
| "loss": 0.0946, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 1.8453830939723913e-05, | |
| "loss": 0.0782, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 1.8377742757840246e-05, | |
| "loss": 0.0882, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 1.8299991891420848e-05, | |
| "loss": 0.0871, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 1.822059377125263e-05, | |
| "loss": 0.073, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 1.8139564155043885e-05, | |
| "loss": 0.0777, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 1.8056919124296957e-05, | |
| "loss": 0.0825, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 1.797267508111664e-05, | |
| "loss": 0.0732, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 1.788684874495491e-05, | |
| "loss": 0.079, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 1.7799457149292752e-05, | |
| "loss": 0.0681, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 1.7710517638259593e-05, | |
| "loss": 0.0683, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 1.76200478631911e-05, | |
| "loss": 0.0743, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 1.7528065779126035e-05, | |
| "loss": 0.0763, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 1.7434589641242814e-05, | |
| "loss": 0.067, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 1.7339638001236495e-05, | |
| "loss": 0.0667, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 1.7243229703636924e-05, | |
| "loss": 0.0703, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 1.714538388206878e-05, | |
| "loss": 0.0716, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 1.704611995545421e-05, | |
| "loss": 0.0513, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 1.694545762415887e-05, | |
| "loss": 0.0663, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 1.6843416866082118e-05, | |
| "loss": 0.0827, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 1.6740017932692073e-05, | |
| "loss": 0.0845, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 1.663528134500646e-05, | |
| "loss": 0.0791, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 1.6529227889519884e-05, | |
| "loss": 0.0596, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 1.642187861407847e-05, | |
| "loss": 0.0678, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 1.631325482370259e-05, | |
| "loss": 0.0822, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 1.6203378076358602e-05, | |
| "loss": 0.0595, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 1.609227017868033e-05, | |
| "loss": 0.0673, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 1.5979953181641246e-05, | |
| "loss": 0.0865, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 1.5866449376178118e-05, | |
| "loss": 0.0639, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 1.5751781288767052e-05, | |
| "loss": 0.0703, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 1.56359716769528e-05, | |
| "loss": 0.0707, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 1.551904352483217e-05, | |
| "loss": 0.0527, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 1.540102003849253e-05, | |
| "loss": 0.0503, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 1.52819246414062e-05, | |
| "loss": 0.0625, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 1.5161780969781728e-05, | |
| "loss": 0.0628, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 1.5040612867872945e-05, | |
| "loss": 0.0685, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 1.4918444383246738e-05, | |
| "loss": 0.0839, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 1.479529976201044e-05, | |
| "loss": 0.0643, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 1.4671203443999847e-05, | |
| "loss": 0.068, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.4546180057928792e-05, | |
| "loss": 0.0494, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.4420254416501198e-05, | |
| "loss": 0.0942, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.4293451511486658e-05, | |
| "loss": 0.0798, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 1.416579650876043e-05, | |
| "loss": 0.0801, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.403731474330893e-05, | |
| "loss": 0.0556, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.3908031714201621e-05, | |
| "loss": 0.0655, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.3777973079530362e-05, | |
| "loss": 0.0596, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.3647164651317178e-05, | |
| "loss": 0.0463, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.35156323903915e-05, | |
| "loss": 0.0551, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.338340240123785e-05, | |
| "loss": 0.0386, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.3250500926815046e-05, | |
| "loss": 0.047, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.3116954343347882e-05, | |
| "loss": 0.0612, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.2982789155092407e-05, | |
| "loss": 0.0409, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 1.2848031989075754e-05, | |
| "loss": 0.0528, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.2712709589811629e-05, | |
| "loss": 0.0525, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.2576848813992475e-05, | |
| "loss": 0.0381, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.2440476625159363e-05, | |
| "loss": 0.0542, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 1.23036200883507e-05, | |
| "loss": 0.0405, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.2166306364730766e-05, | |
| "loss": 0.055, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.2028562706199201e-05, | |
| "loss": 0.0436, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.1890416449982451e-05, | |
| "loss": 0.0467, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 1.1751895013208325e-05, | |
| "loss": 0.0693, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 1.1613025887464642e-05, | |
| "loss": 0.0495, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 1.1473836633343145e-05, | |
| "loss": 0.0571, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 1.133435487496969e-05, | |
| "loss": 0.0331, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.1194608294521853e-05, | |
| "loss": 0.0342, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 1.1054624626734985e-05, | |
| "loss": 0.0679, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 1.0914431653397856e-05, | |
| "loss": 0.0509, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 1.0774057197838963e-05, | |
| "loss": 0.0543, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 1.0633529119404571e-05, | |
| "loss": 0.0893, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 1.0492875307929643e-05, | |
| "loss": 0.0386, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 1.0352123678202686e-05, | |
| "loss": 0.0581, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 1.0211302164425657e-05, | |
| "loss": 0.0599, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 1.0070438714670004e-05, | |
| "loss": 0.0426, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 9.929561285329998e-06, | |
| "loss": 0.0425, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 9.788697835574348e-06, | |
| "loss": 0.0571, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 9.647876321797314e-06, | |
| "loss": 0.0478, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 9.507124692070356e-06, | |
| "loss": 0.0626, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 9.366470880595434e-06, | |
| "loss": 0.0438, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 9.225942802161042e-06, | |
| "loss": 0.0385, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 9.085568346602146e-06, | |
| "loss": 0.0533, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 8.945375373265017e-06, | |
| "loss": 0.0568, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 8.805391705478149e-06, | |
| "loss": 0.0449, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 8.665645125030312e-06, | |
| "loss": 0.0579, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 8.526163366656858e-06, | |
| "loss": 0.0386, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 8.38697411253536e-06, | |
| "loss": 0.0435, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 8.248104986791677e-06, | |
| "loss": 0.0453, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 8.10958355001755e-06, | |
| "loss": 0.0455, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 7.971437293800804e-06, | |
| "loss": 0.0434, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 7.833693635269235e-06, | |
| "loss": 0.0447, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 7.696379911649303e-06, | |
| "loss": 0.0376, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 7.559523374840639e-06, | |
| "loss": 0.0608, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 7.423151186007527e-06, | |
| "loss": 0.0406, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 7.287290410188374e-06, | |
| "loss": 0.0457, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 7.1519680109242486e-06, | |
| "loss": 0.0456, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 7.017210844907598e-06, | |
| "loss": 0.0251, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 6.883045656652122e-06, | |
| "loss": 0.0294, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 6.749499073184957e-06, | |
| "loss": 0.0357, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 6.616597598762151e-06, | |
| "loss": 0.036, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 6.484367609608503e-06, | |
| "loss": 0.0287, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 6.352835348682824e-06, | |
| "loss": 0.0295, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 6.22202692046964e-06, | |
| "loss": 0.0306, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 6.09196828579838e-06, | |
| "loss": 0.0247, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 5.962685256691071e-06, | |
| "loss": 0.0372, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 5.834203491239574e-06, | |
| "loss": 0.0227, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 5.706548488513347e-06, | |
| "loss": 0.0257, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 5.579745583498802e-06, | |
| "loss": 0.0251, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 5.453819942071212e-06, | |
| "loss": 0.0247, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 5.328796556000153e-06, | |
| "loss": 0.0356, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 5.204700237989564e-06, | |
| "loss": 0.0303, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 5.081555616753264e-06, | |
| "loss": 0.02, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 4.959387132127054e-06, | |
| "loss": 0.0328, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 4.838219030218274e-06, | |
| "loss": 0.0264, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 4.718075358593802e-06, | |
| "loss": 0.0308, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 4.598979961507472e-06, | |
| "loss": 0.0358, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 4.48095647516783e-06, | |
| "loss": 0.0272, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 4.364028323047205e-06, | |
| "loss": 0.0306, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 4.248218711232952e-06, | |
| "loss": 0.0193, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 4.133550623821884e-06, | |
| "loss": 0.0243, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 4.0200468183587556e-06, | |
| "loss": 0.0359, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 3.90772982131967e-06, | |
| "loss": 0.0305, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 3.7966219236414036e-06, | |
| "loss": 0.0324, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 3.6867451762974117e-06, | |
| "loss": 0.0355, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 3.5781213859215334e-06, | |
| "loss": 0.0308, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 3.4707721104801175e-06, | |
| "loss": 0.0487, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 3.3647186549935407e-06, | |
| "loss": 0.0165, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 3.2599820673079286e-06, | |
| "loss": 0.03, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 3.1565831339178844e-06, | |
| "loss": 0.0225, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 3.0545423758411298e-06, | |
| "loss": 0.0364, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 2.953880044545795e-06, | |
| "loss": 0.0269, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 2.8546161179312247e-06, | |
| "loss": 0.0142, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 2.7567702963630805e-06, | |
| "loss": 0.0377, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "learning_rate": 2.6603619987635087e-06, | |
| "loss": 0.0295, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 2.5654103587571887e-06, | |
| "loss": 0.031, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 2.4719342208739695e-06, | |
| "loss": 0.0387, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 2.379952136808903e-06, | |
| "loss": 0.0274, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 2.2894823617404107e-06, | |
| "loss": 0.0341, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 2.200542850707247e-06, | |
| "loss": 0.032, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "learning_rate": 2.113151255045095e-06, | |
| "loss": 0.0333, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 2.0273249188833656e-06, | |
| "loss": 0.0298, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 1.9430808757030452e-06, | |
| "loss": 0.0201, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 1.860435844956121e-06, | |
| "loss": 0.0156, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "learning_rate": 1.7794062287473734e-06, | |
| "loss": 0.021, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 1.7000081085791541e-06, | |
| "loss": 0.0197, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 1.622257242159756e-06, | |
| "loss": 0.029, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 1.5461690602760882e-06, | |
| "loss": 0.0249, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 4.15, | |
| "learning_rate": 1.4717586637311943e-06, | |
| "loss": 0.0294, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "learning_rate": 1.3990408203472938e-06, | |
| "loss": 0.0112, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 1.3280299620348847e-06, | |
| "loss": 0.0172, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 1.258740181928524e-06, | |
| "loss": 0.0159, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 1.1911852315898465e-06, | |
| "loss": 0.0241, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 1.1253785182783571e-06, | |
| "loss": 0.0178, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 1.061333102290576e-06, | |
| "loss": 0.0173, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 9.990616943680266e-07, | |
| "loss": 0.0149, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 9.385766531746055e-07, | |
| "loss": 0.0354, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 8.798899828438334e-07, | |
| "loss": 0.0179, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 8.23013330596445e-07, | |
| "loss": 0.0145, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "learning_rate": 7.679579844288509e-07, | |
| "loss": 0.0276, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "learning_rate": 7.147348708728508e-07, | |
| "loss": 0.0117, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 6.633545528271213e-07, | |
| "loss": 0.0189, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 6.138272274608404e-07, | |
| "loss": 0.0257, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 5.661627241899193e-07, | |
| "loss": 0.0158, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 5.203705027262185e-07, | |
| "loss": 0.013, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "learning_rate": 4.7645965120011627e-07, | |
| "loss": 0.0146, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 4.344388843568503e-07, | |
| "loss": 0.0208, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 3.943165418269401e-07, | |
| "loss": 0.0241, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 3.561005864710754e-07, | |
| "loss": 0.016, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 3.197986027997657e-07, | |
| "loss": 0.0222, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 2.8541779546808255e-07, | |
| "loss": 0.0155, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 2.529649878457985e-07, | |
| "loss": 0.0228, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 2.2244662066318146e-07, | |
| "loss": 0.0134, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 1.9386875073274636e-07, | |
| "loss": 0.0233, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 1.6723704974718758e-07, | |
| "loss": 0.0204, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 1.4255680315375164e-07, | |
| "loss": 0.0167, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 1.198329091052608e-07, | |
| "loss": 0.0147, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 9.906987748800945e-08, | |
| "loss": 0.0194, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 8.027182902670571e-08, | |
| "loss": 0.0162, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 4.83, | |
| "learning_rate": 6.344249446665673e-08, | |
| "loss": 0.0126, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 4.8585213833348686e-08, | |
| "loss": 0.0132, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 3.570293576956596e-08, | |
| "loss": 0.0175, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 2.479821695019813e-08, | |
| "loss": 0.0136, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 1.587322157482252e-08, | |
| "loss": 0.0182, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 8.929720938193331e-09, | |
| "loss": 0.017, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "learning_rate": 3.9690930786995266e-09, | |
| "loss": 0.0134, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "learning_rate": 9.923225048724672e-10, | |
| "loss": 0.0196, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.0165, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 230, | |
| "total_flos": 5455864135680.0, | |
| "train_loss": 0.06894507200821587, | |
| "train_runtime": 1542.9779, | |
| "train_samples_per_second": 9.482, | |
| "train_steps_per_second": 0.149 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 230, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 800, | |
| "total_flos": 5455864135680.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |