| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 124, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.016129032258064516, |
| "grad_norm": 10.87326189953438, |
| "learning_rate": 9.998395376482152e-06, |
| "loss": 0.6565, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.03225806451612903, |
| "grad_norm": 7.269831695103879, |
| "learning_rate": 9.993582535855265e-06, |
| "loss": 0.5083, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.04838709677419355, |
| "grad_norm": 7.331143011763737, |
| "learning_rate": 9.985564567238237e-06, |
| "loss": 0.4943, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.06451612903225806, |
| "grad_norm": 3.145851041669267, |
| "learning_rate": 9.974346616959476e-06, |
| "loss": 0.3969, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.08064516129032258, |
| "grad_norm": 3.7578378276180078, |
| "learning_rate": 9.959935885253715e-06, |
| "loss": 0.3808, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0967741935483871, |
| "grad_norm": 2.442098952552494, |
| "learning_rate": 9.942341621640558e-06, |
| "loss": 0.3468, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.11290322580645161, |
| "grad_norm": 2.4754735598965647, |
| "learning_rate": 9.921575118987672e-06, |
| "loss": 0.3702, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.12903225806451613, |
| "grad_norm": 1.9218939275098026, |
| "learning_rate": 9.897649706262474e-06, |
| "loss": 0.2935, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.14516129032258066, |
| "grad_norm": 2.152286809206915, |
| "learning_rate": 9.870580739976936e-06, |
| "loss": 0.3071, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.16129032258064516, |
| "grad_norm": 1.905715976330252, |
| "learning_rate": 9.840385594331022e-06, |
| "loss": 0.3161, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.1774193548387097, |
| "grad_norm": 1.8502142488743858, |
| "learning_rate": 9.807083650061063e-06, |
| "loss": 0.271, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.1935483870967742, |
| "grad_norm": 1.602752217280902, |
| "learning_rate": 9.770696282000245e-06, |
| "loss": 0.2749, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.20967741935483872, |
| "grad_norm": 1.7972465700238842, |
| "learning_rate": 9.731246845359187e-06, |
| "loss": 0.3073, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.22580645161290322, |
| "grad_norm": 1.404340750757187, |
| "learning_rate": 9.688760660735403e-06, |
| "loss": 0.2449, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.24193548387096775, |
| "grad_norm": 1.5470572330505397, |
| "learning_rate": 9.643264997861312e-06, |
| "loss": 0.2763, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.25806451612903225, |
| "grad_norm": 1.752677221357735, |
| "learning_rate": 9.594789058101154e-06, |
| "loss": 0.307, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.27419354838709675, |
| "grad_norm": 1.397073903020116, |
| "learning_rate": 9.543363955708124e-06, |
| "loss": 0.2621, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.2903225806451613, |
| "grad_norm": 1.4387774568099785, |
| "learning_rate": 9.48902269785371e-06, |
| "loss": 0.2426, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.3064516129032258, |
| "grad_norm": 1.469664537291017, |
| "learning_rate": 9.431800163442043e-06, |
| "loss": 0.2629, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.3225806451612903, |
| "grad_norm": 1.4981049280532812, |
| "learning_rate": 9.371733080722911e-06, |
| "loss": 0.2337, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.3387096774193548, |
| "grad_norm": 1.2694332381323004, |
| "learning_rate": 9.308860003717748e-06, |
| "loss": 0.2411, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.3548387096774194, |
| "grad_norm": 1.4461722434727646, |
| "learning_rate": 9.243221287473755e-06, |
| "loss": 0.2433, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.3709677419354839, |
| "grad_norm": 1.6405537394754068, |
| "learning_rate": 9.174859062162037e-06, |
| "loss": 0.2531, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.3870967741935484, |
| "grad_norm": 1.4728123340180916, |
| "learning_rate": 9.103817206036383e-06, |
| "loss": 0.2493, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.4032258064516129, |
| "grad_norm": 1.5428779319352142, |
| "learning_rate": 9.030141317270026e-06, |
| "loss": 0.2747, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.41935483870967744, |
| "grad_norm": 1.4014771667599129, |
| "learning_rate": 8.953878684688492e-06, |
| "loss": 0.2616, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.43548387096774194, |
| "grad_norm": 1.270425344867005, |
| "learning_rate": 8.875078257417294e-06, |
| "loss": 0.2393, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.45161290322580644, |
| "grad_norm": 1.6102760335061446, |
| "learning_rate": 8.793790613463956e-06, |
| "loss": 0.2709, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.46774193548387094, |
| "grad_norm": 1.093876386150566, |
| "learning_rate": 8.710067927254555e-06, |
| "loss": 0.2076, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.4838709677419355, |
| "grad_norm": 1.6926395905502454, |
| "learning_rate": 8.6239639361456e-06, |
| "loss": 0.2762, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.1878187545622536, |
| "learning_rate": 8.535533905932739e-06, |
| "loss": 0.2147, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.5161290322580645, |
| "grad_norm": 1.2217468281963282, |
| "learning_rate": 8.444834595378434e-06, |
| "loss": 0.229, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.532258064516129, |
| "grad_norm": 1.4312811620366488, |
| "learning_rate": 8.351924219781393e-06, |
| "loss": 0.2713, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.5483870967741935, |
| "grad_norm": 1.3385614095697285, |
| "learning_rate": 8.256862413611113e-06, |
| "loss": 0.229, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.5645161290322581, |
| "grad_norm": 1.203181733820383, |
| "learning_rate": 8.15971019223152e-06, |
| "loss": 0.2152, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.5806451612903226, |
| "grad_norm": 1.2963561972002815, |
| "learning_rate": 8.060529912738316e-06, |
| "loss": 0.2294, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.5967741935483871, |
| "grad_norm": 1.2964579182538771, |
| "learning_rate": 7.959385233935087e-06, |
| "loss": 0.2239, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.6129032258064516, |
| "grad_norm": 1.425630530409184, |
| "learning_rate": 7.856341075473963e-06, |
| "loss": 0.2547, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.6290322580645161, |
| "grad_norm": 1.1591186088115961, |
| "learning_rate": 7.751463576186957e-06, |
| "loss": 0.2054, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.6451612903225806, |
| "grad_norm": 1.3037938468273347, |
| "learning_rate": 7.644820051634813e-06, |
| "loss": 0.2314, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.6612903225806451, |
| "grad_norm": 1.2396439904545034, |
| "learning_rate": 7.536478950900537e-06, |
| "loss": 0.2292, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.6774193548387096, |
| "grad_norm": 1.0809122026567421, |
| "learning_rate": 7.4265098126554065e-06, |
| "loss": 0.1987, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.6935483870967742, |
| "grad_norm": 1.3070787735028482, |
| "learning_rate": 7.314983220525604e-06, |
| "loss": 0.2421, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.7096774193548387, |
| "grad_norm": 1.191802079545008, |
| "learning_rate": 7.201970757788172e-06, |
| "loss": 0.2236, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.7258064516129032, |
| "grad_norm": 1.2671048702368255, |
| "learning_rate": 7.087544961425317e-06, |
| "loss": 0.2349, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.7419354838709677, |
| "grad_norm": 1.3193301564380906, |
| "learning_rate": 6.971779275566593e-06, |
| "loss": 0.2262, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.7580645161290323, |
| "grad_norm": 1.1724695158551681, |
| "learning_rate": 6.85474800434884e-06, |
| "loss": 0.2239, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.7741935483870968, |
| "grad_norm": 1.299611161115463, |
| "learning_rate": 6.736526264224101e-06, |
| "loss": 0.2253, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.7903225806451613, |
| "grad_norm": 1.2046675779312723, |
| "learning_rate": 6.617189935746191e-06, |
| "loss": 0.2342, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.8064516129032258, |
| "grad_norm": 1.2114394723006852, |
| "learning_rate": 6.496815614866792e-06, |
| "loss": 0.2432, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.8225806451612904, |
| "grad_norm": 1.0422176168525288, |
| "learning_rate": 6.375480563772391e-06, |
| "loss": 0.214, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.8387096774193549, |
| "grad_norm": 1.002561327631847, |
| "learning_rate": 6.2532626612936035e-06, |
| "loss": 0.1789, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.8548387096774194, |
| "grad_norm": 1.1776027205410509, |
| "learning_rate": 6.130240352918675e-06, |
| "loss": 0.2147, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.8709677419354839, |
| "grad_norm": 1.0373468306422096, |
| "learning_rate": 6.006492600443301e-06, |
| "loss": 0.1862, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.8870967741935484, |
| "grad_norm": 1.1492045148364676, |
| "learning_rate": 5.882098831289044e-06, |
| "loss": 0.2197, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.9032258064516129, |
| "grad_norm": 1.1894276739450094, |
| "learning_rate": 5.757138887522884e-06, |
| "loss": 0.2183, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.9193548387096774, |
| "grad_norm": 1.0692220586170038, |
| "learning_rate": 5.631692974610647e-06, |
| "loss": 0.1927, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.9354838709677419, |
| "grad_norm": 1.0863181585349602, |
| "learning_rate": 5.505841609937162e-06, |
| "loss": 0.2123, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.9516129032258065, |
| "grad_norm": 1.2067302924444834, |
| "learning_rate": 5.379665571126232e-06, |
| "loss": 0.2143, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.967741935483871, |
| "grad_norm": 1.1481248875401728, |
| "learning_rate": 5.253245844193564e-06, |
| "loss": 0.2253, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.9838709677419355, |
| "grad_norm": 1.138471948906496, |
| "learning_rate": 5.12666357156594e-06, |
| "loss": 0.2106, |
| "step": 61 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.9877188308540851, |
| "learning_rate": 5e-06, |
| "loss": 0.1912, |
| "step": 62 |
| }, |
| { |
| "epoch": 1.0161290322580645, |
| "grad_norm": 1.285710509845996, |
| "learning_rate": 4.873336428434062e-06, |
| "loss": 0.1512, |
| "step": 63 |
| }, |
| { |
| "epoch": 1.032258064516129, |
| "grad_norm": 1.1987875037281694, |
| "learning_rate": 4.746754155806437e-06, |
| "loss": 0.1593, |
| "step": 64 |
| }, |
| { |
| "epoch": 1.0483870967741935, |
| "grad_norm": 0.9358488255328534, |
| "learning_rate": 4.62033442887377e-06, |
| "loss": 0.1161, |
| "step": 65 |
| }, |
| { |
| "epoch": 1.064516129032258, |
| "grad_norm": 1.0135885793535764, |
| "learning_rate": 4.49415839006284e-06, |
| "loss": 0.1437, |
| "step": 66 |
| }, |
| { |
| "epoch": 1.0806451612903225, |
| "grad_norm": 0.9654734992075913, |
| "learning_rate": 4.368307025389355e-06, |
| "loss": 0.1293, |
| "step": 67 |
| }, |
| { |
| "epoch": 1.096774193548387, |
| "grad_norm": 1.054312778877148, |
| "learning_rate": 4.2428611124771184e-06, |
| "loss": 0.1263, |
| "step": 68 |
| }, |
| { |
| "epoch": 1.1129032258064515, |
| "grad_norm": 1.1755907513825516, |
| "learning_rate": 4.11790116871096e-06, |
| "loss": 0.1443, |
| "step": 69 |
| }, |
| { |
| "epoch": 1.129032258064516, |
| "grad_norm": 1.1483703491854127, |
| "learning_rate": 3.993507399556699e-06, |
| "loss": 0.1177, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.1451612903225807, |
| "grad_norm": 1.2129688068259314, |
| "learning_rate": 3.869759647081326e-06, |
| "loss": 0.1282, |
| "step": 71 |
| }, |
| { |
| "epoch": 1.1612903225806452, |
| "grad_norm": 1.57231259303588, |
| "learning_rate": 3.7467373387063973e-06, |
| "loss": 0.1581, |
| "step": 72 |
| }, |
| { |
| "epoch": 1.1774193548387097, |
| "grad_norm": 1.169915355030969, |
| "learning_rate": 3.62451943622761e-06, |
| "loss": 0.1244, |
| "step": 73 |
| }, |
| { |
| "epoch": 1.1935483870967742, |
| "grad_norm": 1.0829020767897761, |
| "learning_rate": 3.5031843851332105e-06, |
| "loss": 0.0994, |
| "step": 74 |
| }, |
| { |
| "epoch": 1.2096774193548387, |
| "grad_norm": 1.2819470593965914, |
| "learning_rate": 3.3828100642538097e-06, |
| "loss": 0.1249, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.2258064516129032, |
| "grad_norm": 1.2986543510798598, |
| "learning_rate": 3.2634737357758994e-06, |
| "loss": 0.1319, |
| "step": 76 |
| }, |
| { |
| "epoch": 1.2419354838709677, |
| "grad_norm": 1.0877827005941145, |
| "learning_rate": 3.145251995651162e-06, |
| "loss": 0.1189, |
| "step": 77 |
| }, |
| { |
| "epoch": 1.2580645161290323, |
| "grad_norm": 1.239579004020731, |
| "learning_rate": 3.0282207244334084e-06, |
| "loss": 0.1306, |
| "step": 78 |
| }, |
| { |
| "epoch": 1.2741935483870968, |
| "grad_norm": 1.094394586160559, |
| "learning_rate": 2.912455038574686e-06, |
| "loss": 0.1046, |
| "step": 79 |
| }, |
| { |
| "epoch": 1.2903225806451613, |
| "grad_norm": 1.3077538993469884, |
| "learning_rate": 2.7980292422118282e-06, |
| "loss": 0.1387, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.3064516129032258, |
| "grad_norm": 1.0583821119272765, |
| "learning_rate": 2.6850167794743966e-06, |
| "loss": 0.1157, |
| "step": 81 |
| }, |
| { |
| "epoch": 1.3225806451612903, |
| "grad_norm": 1.0174256798878376, |
| "learning_rate": 2.573490187344596e-06, |
| "loss": 0.1098, |
| "step": 82 |
| }, |
| { |
| "epoch": 1.3387096774193548, |
| "grad_norm": 0.9925876427433509, |
| "learning_rate": 2.4635210490994648e-06, |
| "loss": 0.1038, |
| "step": 83 |
| }, |
| { |
| "epoch": 1.3548387096774195, |
| "grad_norm": 1.0100889117374643, |
| "learning_rate": 2.3551799483651894e-06, |
| "loss": 0.1128, |
| "step": 84 |
| }, |
| { |
| "epoch": 1.370967741935484, |
| "grad_norm": 1.1463148298547765, |
| "learning_rate": 2.2485364238130435e-06, |
| "loss": 0.1325, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.3870967741935485, |
| "grad_norm": 1.038646259222713, |
| "learning_rate": 2.1436589245260375e-06, |
| "loss": 0.1238, |
| "step": 86 |
| }, |
| { |
| "epoch": 1.403225806451613, |
| "grad_norm": 1.0872748948208963, |
| "learning_rate": 2.040614766064913e-06, |
| "loss": 0.1234, |
| "step": 87 |
| }, |
| { |
| "epoch": 1.4193548387096775, |
| "grad_norm": 1.1491099700687, |
| "learning_rate": 1.9394700872616856e-06, |
| "loss": 0.1243, |
| "step": 88 |
| }, |
| { |
| "epoch": 1.435483870967742, |
| "grad_norm": 1.084504576902654, |
| "learning_rate": 1.8402898077684806e-06, |
| "loss": 0.1054, |
| "step": 89 |
| }, |
| { |
| "epoch": 1.4516129032258065, |
| "grad_norm": 1.0261489922524454, |
| "learning_rate": 1.74313758638889e-06, |
| "loss": 0.1095, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.467741935483871, |
| "grad_norm": 0.9988753974390192, |
| "learning_rate": 1.648075780218607e-06, |
| "loss": 0.1077, |
| "step": 91 |
| }, |
| { |
| "epoch": 1.4838709677419355, |
| "grad_norm": 1.0086070477288833, |
| "learning_rate": 1.555165404621567e-06, |
| "loss": 0.1122, |
| "step": 92 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 1.022320165417706, |
| "learning_rate": 1.4644660940672628e-06, |
| "loss": 0.1162, |
| "step": 93 |
| }, |
| { |
| "epoch": 1.5161290322580645, |
| "grad_norm": 1.2262545163733904, |
| "learning_rate": 1.3760360638544012e-06, |
| "loss": 0.1546, |
| "step": 94 |
| }, |
| { |
| "epoch": 1.532258064516129, |
| "grad_norm": 1.0626317180159468, |
| "learning_rate": 1.2899320727454472e-06, |
| "loss": 0.1187, |
| "step": 95 |
| }, |
| { |
| "epoch": 1.5483870967741935, |
| "grad_norm": 0.9985594277028694, |
| "learning_rate": 1.2062093865360458e-06, |
| "loss": 0.1044, |
| "step": 96 |
| }, |
| { |
| "epoch": 1.564516129032258, |
| "grad_norm": 1.0166569057705444, |
| "learning_rate": 1.1249217425827063e-06, |
| "loss": 0.1123, |
| "step": 97 |
| }, |
| { |
| "epoch": 1.5806451612903225, |
| "grad_norm": 1.1467229722350185, |
| "learning_rate": 1.046121315311508e-06, |
| "loss": 0.1362, |
| "step": 98 |
| }, |
| { |
| "epoch": 1.596774193548387, |
| "grad_norm": 1.0671195863505887, |
| "learning_rate": 9.69858682729976e-07, |
| "loss": 0.1279, |
| "step": 99 |
| }, |
| { |
| "epoch": 1.6129032258064515, |
| "grad_norm": 0.9711179264510111, |
| "learning_rate": 8.961827939636198e-07, |
| "loss": 0.116, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.629032258064516, |
| "grad_norm": 0.977694859124101, |
| "learning_rate": 8.251409378379638e-07, |
| "loss": 0.111, |
| "step": 101 |
| }, |
| { |
| "epoch": 1.6451612903225805, |
| "grad_norm": 0.9786280370033354, |
| "learning_rate": 7.567787125262449e-07, |
| "loss": 0.1122, |
| "step": 102 |
| }, |
| { |
| "epoch": 1.661290322580645, |
| "grad_norm": 1.0263905054452944, |
| "learning_rate": 6.911399962822518e-07, |
| "loss": 0.1195, |
| "step": 103 |
| }, |
| { |
| "epoch": 1.6774193548387095, |
| "grad_norm": 1.0605409621143818, |
| "learning_rate": 6.282669192770896e-07, |
| "loss": 0.1162, |
| "step": 104 |
| }, |
| { |
| "epoch": 1.6935483870967742, |
| "grad_norm": 0.9224415638360666, |
| "learning_rate": 5.681998365579594e-07, |
| "loss": 0.0991, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.7096774193548387, |
| "grad_norm": 1.0983830331964775, |
| "learning_rate": 5.109773021462921e-07, |
| "loss": 0.1118, |
| "step": 106 |
| }, |
| { |
| "epoch": 1.7258064516129032, |
| "grad_norm": 0.9664038127573374, |
| "learning_rate": 4.5663604429187547e-07, |
| "loss": 0.1034, |
| "step": 107 |
| }, |
| { |
| "epoch": 1.7419354838709677, |
| "grad_norm": 1.2888701500097894, |
| "learning_rate": 4.05210941898847e-07, |
| "loss": 0.1227, |
| "step": 108 |
| }, |
| { |
| "epoch": 1.7580645161290323, |
| "grad_norm": 1.0240750263592804, |
| "learning_rate": 3.567350021386895e-07, |
| "loss": 0.1018, |
| "step": 109 |
| }, |
| { |
| "epoch": 1.7741935483870968, |
| "grad_norm": 1.0406579684790231, |
| "learning_rate": 3.112393392645985e-07, |
| "loss": 0.1078, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.7903225806451613, |
| "grad_norm": 1.1019997789127864, |
| "learning_rate": 2.6875315464081566e-07, |
| "loss": 0.123, |
| "step": 111 |
| }, |
| { |
| "epoch": 1.8064516129032258, |
| "grad_norm": 1.225451816815891, |
| "learning_rate": 2.2930371799975593e-07, |
| "loss": 0.1243, |
| "step": 112 |
| }, |
| { |
| "epoch": 1.8225806451612905, |
| "grad_norm": 1.1306931074173066, |
| "learning_rate": 1.9291634993893803e-07, |
| "loss": 0.1308, |
| "step": 113 |
| }, |
| { |
| "epoch": 1.838709677419355, |
| "grad_norm": 1.0585635247370584, |
| "learning_rate": 1.5961440566897913e-07, |
| "loss": 0.1189, |
| "step": 114 |
| }, |
| { |
| "epoch": 1.8548387096774195, |
| "grad_norm": 1.0966669364700545, |
| "learning_rate": 1.2941926002306536e-07, |
| "loss": 0.1256, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.870967741935484, |
| "grad_norm": 0.975066404712545, |
| "learning_rate": 1.0235029373752758e-07, |
| "loss": 0.1002, |
| "step": 116 |
| }, |
| { |
| "epoch": 1.8870967741935485, |
| "grad_norm": 0.9503457863613388, |
| "learning_rate": 7.842488101232893e-08, |
| "loss": 0.0993, |
| "step": 117 |
| }, |
| { |
| "epoch": 1.903225806451613, |
| "grad_norm": 0.9610477555034781, |
| "learning_rate": 5.7658378359443104e-08, |
| "loss": 0.1045, |
| "step": 118 |
| }, |
| { |
| "epoch": 1.9193548387096775, |
| "grad_norm": 0.9827078500106823, |
| "learning_rate": 4.006411474628491e-08, |
| "loss": 0.105, |
| "step": 119 |
| }, |
| { |
| "epoch": 1.935483870967742, |
| "grad_norm": 0.8859224629862705, |
| "learning_rate": 2.5653383040524228e-08, |
| "loss": 0.101, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.9516129032258065, |
| "grad_norm": 0.9475236405052518, |
| "learning_rate": 1.4435432761762958e-08, |
| "loss": 0.1031, |
| "step": 121 |
| }, |
| { |
| "epoch": 1.967741935483871, |
| "grad_norm": 1.1222128012665822, |
| "learning_rate": 6.417464144736208e-09, |
| "loss": 0.1334, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.9838709677419355, |
| "grad_norm": 0.9058624463928132, |
| "learning_rate": 1.6046235178474034e-09, |
| "loss": 0.0977, |
| "step": 123 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.9425911821452717, |
| "learning_rate": 0.0, |
| "loss": 0.0889, |
| "step": 124 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 124, |
| "total_flos": 6490769326080.0, |
| "train_loss": 0.1912058055881531, |
| "train_runtime": 510.5024, |
| "train_samples_per_second": 1.939, |
| "train_steps_per_second": 0.243 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 124, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 70000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6490769326080.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|