| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 124, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.016129032258064516, |
| "grad_norm": 10.15686507609796, |
| "learning_rate": 9.998395376482152e-06, |
| "loss": 0.7678, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.03225806451612903, |
| "grad_norm": 5.96419649390907, |
| "learning_rate": 9.993582535855265e-06, |
| "loss": 0.6603, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.04838709677419355, |
| "grad_norm": 3.1301448423281513, |
| "learning_rate": 9.985564567238237e-06, |
| "loss": 0.5276, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.06451612903225806, |
| "grad_norm": 2.38982751185715, |
| "learning_rate": 9.974346616959476e-06, |
| "loss": 0.4648, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.08064516129032258, |
| "grad_norm": 3.274343982377678, |
| "learning_rate": 9.959935885253715e-06, |
| "loss": 0.4326, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0967741935483871, |
| "grad_norm": 2.181012552893853, |
| "learning_rate": 9.942341621640558e-06, |
| "loss": 0.4031, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.11290322580645161, |
| "grad_norm": 1.8368080271945009, |
| "learning_rate": 9.921575118987672e-06, |
| "loss": 0.4226, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.12903225806451613, |
| "grad_norm": 1.548568376752524, |
| "learning_rate": 9.897649706262474e-06, |
| "loss": 0.3443, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.14516129032258066, |
| "grad_norm": 1.446205206337782, |
| "learning_rate": 9.870580739976936e-06, |
| "loss": 0.3426, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.16129032258064516, |
| "grad_norm": 1.4569275973035858, |
| "learning_rate": 9.840385594331022e-06, |
| "loss": 0.3543, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.1774193548387097, |
| "grad_norm": 1.1765730690143612, |
| "learning_rate": 9.807083650061063e-06, |
| "loss": 0.299, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.1935483870967742, |
| "grad_norm": 1.2316824294179551, |
| "learning_rate": 9.770696282000245e-06, |
| "loss": 0.2997, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.20967741935483872, |
| "grad_norm": 1.3782255139384922, |
| "learning_rate": 9.731246845359187e-06, |
| "loss": 0.3402, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.22580645161290322, |
| "grad_norm": 1.0467453809126217, |
| "learning_rate": 9.688760660735403e-06, |
| "loss": 0.2645, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.24193548387096775, |
| "grad_norm": 1.2125294248521037, |
| "learning_rate": 9.643264997861312e-06, |
| "loss": 0.2942, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.25806451612903225, |
| "grad_norm": 1.285158067414639, |
| "learning_rate": 9.594789058101154e-06, |
| "loss": 0.3295, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.27419354838709675, |
| "grad_norm": 0.9919695419144787, |
| "learning_rate": 9.543363955708124e-06, |
| "loss": 0.2725, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.2903225806451613, |
| "grad_norm": 1.1113906841527212, |
| "learning_rate": 9.48902269785371e-06, |
| "loss": 0.252, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.3064516129032258, |
| "grad_norm": 1.1098542171382924, |
| "learning_rate": 9.431800163442043e-06, |
| "loss": 0.2723, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.3225806451612903, |
| "grad_norm": 0.9829186173171938, |
| "learning_rate": 9.371733080722911e-06, |
| "loss": 0.2437, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.3387096774193548, |
| "grad_norm": 0.9262260680308071, |
| "learning_rate": 9.308860003717748e-06, |
| "loss": 0.2569, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.3548387096774194, |
| "grad_norm": 1.0035515387872362, |
| "learning_rate": 9.243221287473755e-06, |
| "loss": 0.2535, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.3709677419354839, |
| "grad_norm": 1.0837571968151323, |
| "learning_rate": 9.174859062162037e-06, |
| "loss": 0.2678, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.3870967741935484, |
| "grad_norm": 1.0231542315640096, |
| "learning_rate": 9.103817206036383e-06, |
| "loss": 0.2588, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.4032258064516129, |
| "grad_norm": 1.1318934340163525, |
| "learning_rate": 9.030141317270026e-06, |
| "loss": 0.2869, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.41935483870967744, |
| "grad_norm": 0.9400452093313983, |
| "learning_rate": 8.953878684688492e-06, |
| "loss": 0.2719, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.43548387096774194, |
| "grad_norm": 0.9308181327082331, |
| "learning_rate": 8.875078257417294e-06, |
| "loss": 0.2446, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.45161290322580644, |
| "grad_norm": 1.1338746247861013, |
| "learning_rate": 8.793790613463956e-06, |
| "loss": 0.2907, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.46774193548387094, |
| "grad_norm": 0.8853691664655057, |
| "learning_rate": 8.710067927254555e-06, |
| "loss": 0.2195, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.4838709677419355, |
| "grad_norm": 1.1461819517748095, |
| "learning_rate": 8.6239639361456e-06, |
| "loss": 0.2872, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.8593719094740551, |
| "learning_rate": 8.535533905932739e-06, |
| "loss": 0.2196, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.5161290322580645, |
| "grad_norm": 0.9428267171030292, |
| "learning_rate": 8.444834595378434e-06, |
| "loss": 0.2337, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.532258064516129, |
| "grad_norm": 1.0048900337705848, |
| "learning_rate": 8.351924219781393e-06, |
| "loss": 0.2783, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.5483870967741935, |
| "grad_norm": 0.8833694607124812, |
| "learning_rate": 8.256862413611113e-06, |
| "loss": 0.2371, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.5645161290322581, |
| "grad_norm": 0.8770851001997254, |
| "learning_rate": 8.15971019223152e-06, |
| "loss": 0.2212, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.5806451612903226, |
| "grad_norm": 0.8726868292374229, |
| "learning_rate": 8.060529912738316e-06, |
| "loss": 0.2363, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.5967741935483871, |
| "grad_norm": 0.8723848398490387, |
| "learning_rate": 7.959385233935087e-06, |
| "loss": 0.226, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.6129032258064516, |
| "grad_norm": 1.041040575509601, |
| "learning_rate": 7.856341075473963e-06, |
| "loss": 0.259, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.6290322580645161, |
| "grad_norm": 0.8472904589042461, |
| "learning_rate": 7.751463576186957e-06, |
| "loss": 0.2127, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.6451612903225806, |
| "grad_norm": 0.9304598631680553, |
| "learning_rate": 7.644820051634813e-06, |
| "loss": 0.2347, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.6612903225806451, |
| "grad_norm": 0.8868255569756148, |
| "learning_rate": 7.536478950900537e-06, |
| "loss": 0.2324, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.6774193548387096, |
| "grad_norm": 0.8168353033321583, |
| "learning_rate": 7.4265098126554065e-06, |
| "loss": 0.2039, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.6935483870967742, |
| "grad_norm": 0.9439571933236675, |
| "learning_rate": 7.314983220525604e-06, |
| "loss": 0.2457, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.7096774193548387, |
| "grad_norm": 0.9203901483290133, |
| "learning_rate": 7.201970757788172e-06, |
| "loss": 0.2284, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.7258064516129032, |
| "grad_norm": 0.9133040367678048, |
| "learning_rate": 7.087544961425317e-06, |
| "loss": 0.2361, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.7419354838709677, |
| "grad_norm": 0.9912019630972784, |
| "learning_rate": 6.971779275566593e-06, |
| "loss": 0.2281, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.7580645161290323, |
| "grad_norm": 0.8897497720584243, |
| "learning_rate": 6.85474800434884e-06, |
| "loss": 0.2326, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.7741935483870968, |
| "grad_norm": 0.9328471365917465, |
| "learning_rate": 6.736526264224101e-06, |
| "loss": 0.2298, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.7903225806451613, |
| "grad_norm": 0.8982184452558108, |
| "learning_rate": 6.617189935746191e-06, |
| "loss": 0.2381, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.8064516129032258, |
| "grad_norm": 0.9378153254036978, |
| "learning_rate": 6.496815614866792e-06, |
| "loss": 0.2553, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.8225806451612904, |
| "grad_norm": 0.8258829857538255, |
| "learning_rate": 6.375480563772391e-06, |
| "loss": 0.2224, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.8387096774193549, |
| "grad_norm": 0.774298426938682, |
| "learning_rate": 6.2532626612936035e-06, |
| "loss": 0.1871, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.8548387096774194, |
| "grad_norm": 0.8677012412591526, |
| "learning_rate": 6.130240352918675e-06, |
| "loss": 0.2206, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.8709677419354839, |
| "grad_norm": 0.7862953781240192, |
| "learning_rate": 6.006492600443301e-06, |
| "loss": 0.1909, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.8870967741935484, |
| "grad_norm": 0.8984616494521016, |
| "learning_rate": 5.882098831289044e-06, |
| "loss": 0.2247, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.9032258064516129, |
| "grad_norm": 0.899752233099467, |
| "learning_rate": 5.757138887522884e-06, |
| "loss": 0.2234, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.9193548387096774, |
| "grad_norm": 0.7958478757018235, |
| "learning_rate": 5.631692974610647e-06, |
| "loss": 0.1977, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.9354838709677419, |
| "grad_norm": 0.8899807298926051, |
| "learning_rate": 5.505841609937162e-06, |
| "loss": 0.2189, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.9516129032258065, |
| "grad_norm": 0.9987199589944712, |
| "learning_rate": 5.379665571126232e-06, |
| "loss": 0.2221, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.967741935483871, |
| "grad_norm": 0.8717675589644842, |
| "learning_rate": 5.253245844193564e-06, |
| "loss": 0.2283, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.9838709677419355, |
| "grad_norm": 0.8798287573122808, |
| "learning_rate": 5.12666357156594e-06, |
| "loss": 0.2163, |
| "step": 61 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.8445748362742429, |
| "learning_rate": 5e-06, |
| "loss": 0.201, |
| "step": 62 |
| }, |
| { |
| "epoch": 1.0161290322580645, |
| "grad_norm": 0.994941720797875, |
| "learning_rate": 4.873336428434062e-06, |
| "loss": 0.173, |
| "step": 63 |
| }, |
| { |
| "epoch": 1.032258064516129, |
| "grad_norm": 0.9458245363242225, |
| "learning_rate": 4.746754155806437e-06, |
| "loss": 0.1825, |
| "step": 64 |
| }, |
| { |
| "epoch": 1.0483870967741935, |
| "grad_norm": 0.7257849679114303, |
| "learning_rate": 4.62033442887377e-06, |
| "loss": 0.1314, |
| "step": 65 |
| }, |
| { |
| "epoch": 1.064516129032258, |
| "grad_norm": 0.8075549406882644, |
| "learning_rate": 4.49415839006284e-06, |
| "loss": 0.1682, |
| "step": 66 |
| }, |
| { |
| "epoch": 1.0806451612903225, |
| "grad_norm": 0.7819735159921916, |
| "learning_rate": 4.368307025389355e-06, |
| "loss": 0.1527, |
| "step": 67 |
| }, |
| { |
| "epoch": 1.096774193548387, |
| "grad_norm": 0.8674902114444599, |
| "learning_rate": 4.2428611124771184e-06, |
| "loss": 0.1519, |
| "step": 68 |
| }, |
| { |
| "epoch": 1.1129032258064515, |
| "grad_norm": 0.9275180488064847, |
| "learning_rate": 4.11790116871096e-06, |
| "loss": 0.169, |
| "step": 69 |
| }, |
| { |
| "epoch": 1.129032258064516, |
| "grad_norm": 0.7673142286792733, |
| "learning_rate": 3.993507399556699e-06, |
| "loss": 0.1444, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.1451612903225807, |
| "grad_norm": 0.8969793366098866, |
| "learning_rate": 3.869759647081326e-06, |
| "loss": 0.1538, |
| "step": 71 |
| }, |
| { |
| "epoch": 1.1612903225806452, |
| "grad_norm": 1.2280000886563323, |
| "learning_rate": 3.7467373387063973e-06, |
| "loss": 0.1862, |
| "step": 72 |
| }, |
| { |
| "epoch": 1.1774193548387097, |
| "grad_norm": 0.8709617059435755, |
| "learning_rate": 3.62451943622761e-06, |
| "loss": 0.1499, |
| "step": 73 |
| }, |
| { |
| "epoch": 1.1935483870967742, |
| "grad_norm": 0.7823129242110545, |
| "learning_rate": 3.5031843851332105e-06, |
| "loss": 0.1242, |
| "step": 74 |
| }, |
| { |
| "epoch": 1.2096774193548387, |
| "grad_norm": 0.912675988682136, |
| "learning_rate": 3.3828100642538097e-06, |
| "loss": 0.1429, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.2258064516129032, |
| "grad_norm": 1.0407617625209924, |
| "learning_rate": 3.2634737357758994e-06, |
| "loss": 0.1587, |
| "step": 76 |
| }, |
| { |
| "epoch": 1.2419354838709677, |
| "grad_norm": 0.9001406768249398, |
| "learning_rate": 3.145251995651162e-06, |
| "loss": 0.143, |
| "step": 77 |
| }, |
| { |
| "epoch": 1.2580645161290323, |
| "grad_norm": 0.9588790722109072, |
| "learning_rate": 3.0282207244334084e-06, |
| "loss": 0.1619, |
| "step": 78 |
| }, |
| { |
| "epoch": 1.2741935483870968, |
| "grad_norm": 0.8739427027300898, |
| "learning_rate": 2.912455038574686e-06, |
| "loss": 0.1274, |
| "step": 79 |
| }, |
| { |
| "epoch": 1.2903225806451613, |
| "grad_norm": 1.017730166512566, |
| "learning_rate": 2.7980292422118282e-06, |
| "loss": 0.167, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.3064516129032258, |
| "grad_norm": 0.8214114701452577, |
| "learning_rate": 2.6850167794743966e-06, |
| "loss": 0.137, |
| "step": 81 |
| }, |
| { |
| "epoch": 1.3225806451612903, |
| "grad_norm": 0.813408070361553, |
| "learning_rate": 2.573490187344596e-06, |
| "loss": 0.1345, |
| "step": 82 |
| }, |
| { |
| "epoch": 1.3387096774193548, |
| "grad_norm": 0.7916370700075542, |
| "learning_rate": 2.4635210490994648e-06, |
| "loss": 0.1237, |
| "step": 83 |
| }, |
| { |
| "epoch": 1.3548387096774195, |
| "grad_norm": 0.8759460853242588, |
| "learning_rate": 2.3551799483651894e-06, |
| "loss": 0.1458, |
| "step": 84 |
| }, |
| { |
| "epoch": 1.370967741935484, |
| "grad_norm": 0.9542077854110009, |
| "learning_rate": 2.2485364238130435e-06, |
| "loss": 0.1611, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.3870967741935485, |
| "grad_norm": 0.8545037869947797, |
| "learning_rate": 2.1436589245260375e-06, |
| "loss": 0.1438, |
| "step": 86 |
| }, |
| { |
| "epoch": 1.403225806451613, |
| "grad_norm": 0.9675154332284962, |
| "learning_rate": 2.040614766064913e-06, |
| "loss": 0.1629, |
| "step": 87 |
| }, |
| { |
| "epoch": 1.4193548387096775, |
| "grad_norm": 0.8487524763895334, |
| "learning_rate": 1.9394700872616856e-06, |
| "loss": 0.1533, |
| "step": 88 |
| }, |
| { |
| "epoch": 1.435483870967742, |
| "grad_norm": 0.7731676370643805, |
| "learning_rate": 1.8402898077684806e-06, |
| "loss": 0.132, |
| "step": 89 |
| }, |
| { |
| "epoch": 1.4516129032258065, |
| "grad_norm": 0.8043274157206313, |
| "learning_rate": 1.74313758638889e-06, |
| "loss": 0.1382, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.467741935483871, |
| "grad_norm": 0.849535665349673, |
| "learning_rate": 1.648075780218607e-06, |
| "loss": 0.1329, |
| "step": 91 |
| }, |
| { |
| "epoch": 1.4838709677419355, |
| "grad_norm": 0.7984362048371111, |
| "learning_rate": 1.555165404621567e-06, |
| "loss": 0.1419, |
| "step": 92 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.8382729160325499, |
| "learning_rate": 1.4644660940672628e-06, |
| "loss": 0.1504, |
| "step": 93 |
| }, |
| { |
| "epoch": 1.5161290322580645, |
| "grad_norm": 0.9940697507002278, |
| "learning_rate": 1.3760360638544012e-06, |
| "loss": 0.1897, |
| "step": 94 |
| }, |
| { |
| "epoch": 1.532258064516129, |
| "grad_norm": 0.8745718465092378, |
| "learning_rate": 1.2899320727454472e-06, |
| "loss": 0.1507, |
| "step": 95 |
| }, |
| { |
| "epoch": 1.5483870967741935, |
| "grad_norm": 0.8103194335716973, |
| "learning_rate": 1.2062093865360458e-06, |
| "loss": 0.1411, |
| "step": 96 |
| }, |
| { |
| "epoch": 1.564516129032258, |
| "grad_norm": 0.841758121990542, |
| "learning_rate": 1.1249217425827063e-06, |
| "loss": 0.1449, |
| "step": 97 |
| }, |
| { |
| "epoch": 1.5806451612903225, |
| "grad_norm": 0.9097959698405236, |
| "learning_rate": 1.046121315311508e-06, |
| "loss": 0.1664, |
| "step": 98 |
| }, |
| { |
| "epoch": 1.596774193548387, |
| "grad_norm": 0.8797544299430834, |
| "learning_rate": 9.69858682729976e-07, |
| "loss": 0.1599, |
| "step": 99 |
| }, |
| { |
| "epoch": 1.6129032258064515, |
| "grad_norm": 0.8094378510555039, |
| "learning_rate": 8.961827939636198e-07, |
| "loss": 0.146, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.629032258064516, |
| "grad_norm": 0.8337407546752948, |
| "learning_rate": 8.251409378379638e-07, |
| "loss": 0.1458, |
| "step": 101 |
| }, |
| { |
| "epoch": 1.6451612903225805, |
| "grad_norm": 0.8381952627625123, |
| "learning_rate": 7.567787125262449e-07, |
| "loss": 0.1459, |
| "step": 102 |
| }, |
| { |
| "epoch": 1.661290322580645, |
| "grad_norm": 0.8654177303491782, |
| "learning_rate": 6.911399962822518e-07, |
| "loss": 0.152, |
| "step": 103 |
| }, |
| { |
| "epoch": 1.6774193548387095, |
| "grad_norm": 0.8725448319864291, |
| "learning_rate": 6.282669192770896e-07, |
| "loss": 0.146, |
| "step": 104 |
| }, |
| { |
| "epoch": 1.6935483870967742, |
| "grad_norm": 0.7602570626222729, |
| "learning_rate": 5.681998365579594e-07, |
| "loss": 0.1236, |
| "step": 105 |
| }, |
| { |
| "epoch": 1.7096774193548387, |
| "grad_norm": 0.897016178134926, |
| "learning_rate": 5.109773021462921e-07, |
| "loss": 0.1364, |
| "step": 106 |
| }, |
| { |
| "epoch": 1.7258064516129032, |
| "grad_norm": 0.7481978415933923, |
| "learning_rate": 4.5663604429187547e-07, |
| "loss": 0.1313, |
| "step": 107 |
| }, |
| { |
| "epoch": 1.7419354838709677, |
| "grad_norm": 0.8647131233347217, |
| "learning_rate": 4.05210941898847e-07, |
| "loss": 0.1495, |
| "step": 108 |
| }, |
| { |
| "epoch": 1.7580645161290323, |
| "grad_norm": 0.8293570453259962, |
| "learning_rate": 3.567350021386895e-07, |
| "loss": 0.1344, |
| "step": 109 |
| }, |
| { |
| "epoch": 1.7741935483870968, |
| "grad_norm": 0.8666108700735493, |
| "learning_rate": 3.112393392645985e-07, |
| "loss": 0.1393, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.7903225806451613, |
| "grad_norm": 0.93316130446411, |
| "learning_rate": 2.6875315464081566e-07, |
| "loss": 0.1548, |
| "step": 111 |
| }, |
| { |
| "epoch": 1.8064516129032258, |
| "grad_norm": 0.9639975841844778, |
| "learning_rate": 2.2930371799975593e-07, |
| "loss": 0.1548, |
| "step": 112 |
| }, |
| { |
| "epoch": 1.8225806451612905, |
| "grad_norm": 0.8842105543843218, |
| "learning_rate": 1.9291634993893803e-07, |
| "loss": 0.1582, |
| "step": 113 |
| }, |
| { |
| "epoch": 1.838709677419355, |
| "grad_norm": 0.8940772924089719, |
| "learning_rate": 1.5961440566897913e-07, |
| "loss": 0.1567, |
| "step": 114 |
| }, |
| { |
| "epoch": 1.8548387096774195, |
| "grad_norm": 0.9288648599580145, |
| "learning_rate": 1.2941926002306536e-07, |
| "loss": 0.1633, |
| "step": 115 |
| }, |
| { |
| "epoch": 1.870967741935484, |
| "grad_norm": 0.7878410862418067, |
| "learning_rate": 1.0235029373752758e-07, |
| "loss": 0.1283, |
| "step": 116 |
| }, |
| { |
| "epoch": 1.8870967741935485, |
| "grad_norm": 0.7702795658600806, |
| "learning_rate": 7.842488101232893e-08, |
| "loss": 0.1312, |
| "step": 117 |
| }, |
| { |
| "epoch": 1.903225806451613, |
| "grad_norm": 0.8516665684692306, |
| "learning_rate": 5.7658378359443104e-08, |
| "loss": 0.1429, |
| "step": 118 |
| }, |
| { |
| "epoch": 1.9193548387096775, |
| "grad_norm": 0.8320783595823822, |
| "learning_rate": 4.006411474628491e-08, |
| "loss": 0.134, |
| "step": 119 |
| }, |
| { |
| "epoch": 1.935483870967742, |
| "grad_norm": 0.7845298483605121, |
| "learning_rate": 2.5653383040524228e-08, |
| "loss": 0.1349, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.9516129032258065, |
| "grad_norm": 0.7988882748756444, |
| "learning_rate": 1.4435432761762958e-08, |
| "loss": 0.1329, |
| "step": 121 |
| }, |
| { |
| "epoch": 1.967741935483871, |
| "grad_norm": 0.8959642076550108, |
| "learning_rate": 6.417464144736208e-09, |
| "loss": 0.1647, |
| "step": 122 |
| }, |
| { |
| "epoch": 1.9838709677419355, |
| "grad_norm": 0.756336009916006, |
| "learning_rate": 1.6046235178474034e-09, |
| "loss": 0.124, |
| "step": 123 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.7745110773259603, |
| "learning_rate": 0.0, |
| "loss": 0.1213, |
| "step": 124 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 124, |
| "total_flos": 4268929056768.0, |
| "train_loss": 0.21466370521774214, |
| "train_runtime": 241.9786, |
| "train_samples_per_second": 4.091, |
| "train_steps_per_second": 0.512 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 124, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 70000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4268929056768.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|